示例#1
0
  def _note_metrics(labels, predictions):
    """A pyfunc that wraps a call to precision_recall_f1_overlap."""
    est_sequence = pianoroll_to_note_sequence(
        predictions,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_duration_ms=hparams.min_duration_ms)

    ref_sequence = pianoroll_to_note_sequence(
        labels,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_duration_ms=hparams.min_duration_ms)

    est_intervals, est_pitches = sequence_to_valued_intervals(
        est_sequence, hparams.min_duration_ms)
    ref_intervals, ref_pitches = sequence_to_valued_intervals(
        ref_sequence, hparams.min_duration_ms)

    if est_intervals.size == 0 or ref_intervals.size == 0:
      return 0., 0., 0.
    note_precision, note_recall, note_f1, _ = precision_recall_f1_overlap(
        ref_intervals,
        pretty_midi.note_number_to_hz(ref_pitches),
        est_intervals,
        pretty_midi.note_number_to_hz(est_pitches),
        offset_ratio=offset_ratio)

    return note_precision, note_recall, note_f1
示例#2
0
    def _note_metrics(labels, predictions):
        """A pyfunc that wraps a call to precision_recall_f1_overlap."""
        est_sequence = pianoroll_to_note_sequence(
            predictions,
            frames_per_second=data.hparams_frames_per_second(hparams),
            min_duration_ms=hparams.min_duration_ms)

        ref_sequence = pianoroll_to_note_sequence(
            labels,
            frames_per_second=data.hparams_frames_per_second(hparams),
            min_duration_ms=hparams.min_duration_ms)

        est_intervals, est_pitches = sequence_to_valued_intervals(
            est_sequence, hparams.min_duration_ms)
        ref_intervals, ref_pitches = sequence_to_valued_intervals(
            ref_sequence, hparams.min_duration_ms)

        if est_intervals.size == 0 or ref_intervals.size == 0:
            return 0., 0., 0.
        note_precision, note_recall, note_f1, _ = precision_recall_f1_overlap(
            ref_intervals,
            pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals,
            pretty_midi.note_number_to_hz(est_pitches),
            offset_ratio=offset_ratio)

        return note_precision, note_recall, note_f1
示例#3
0
def get_f1_score_notes(ref_intervals, ref_pitches, est_intervals, est_pitches):

    precision, recall, f1, _ = mir_eval.transcription.precision_recall_f1_overlap(
        ref_intervals,
        pretty_midi.note_number_to_hz(ref_pitches),
        est_intervals,
        pretty_midi.note_number_to_hz(est_pitches),
        offset_ratio=None)

    return precision, recall, f1
示例#4
0
def magenta_note_eval(pred_seq,
                      label_seq,
                      onset_tolerance=0.05,
                      restrict_to_pitch=None):
    note_density = len(pred_seq.notes) / pred_seq.total_time

    est_intervals, est_pitches, est_velocities = (sequence_to_valued_intervals(
        pred_seq, restrict_to_pitch=restrict_to_pitch))

    ref_intervals, ref_pitches, ref_velocities = (sequence_to_valued_intervals(
        label_seq, restrict_to_pitch=restrict_to_pitch))

    note_precision, note_recall, note_f1, _ = (
        mir_eval.transcription.precision_recall_f1_overlap(
            ref_intervals,
            pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals,
            pretty_midi.note_number_to_hz(est_pitches),
            onset_tolerance=onset_tolerance,
            offset_ratio=None))
    '''
    (note_with_velocity_precision, note_with_velocity_recall,
    note_with_velocity_f1, _) = (
        mir_eval.transcription_velocity.precision_recall_f1_overlap(
            ref_intervals=ref_intervals,
            ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
            ref_velocities=ref_velocities,
            est_intervals=est_intervals,
            est_pitches=pretty_midi.note_number_to_hz(est_pitches),
            est_velocities=est_velocities,
            offset_ratio=None))
    '''
    (note_with_offsets_precision, note_with_offsets_recall,
     note_with_offsets_f1,
     _) = (mir_eval.transcription.precision_recall_f1_overlap(
         ref_intervals,
         pretty_midi.note_number_to_hz(ref_pitches),
         est_intervals,
         pretty_midi.note_number_to_hz(est_pitches),
         onset_tolerance=onset_tolerance))
    '''
    (note_with_offsets_velocity_precision, note_with_offsets_velocity_recall,
    note_with_offsets_velocity_f1, _) = (
        mir_eval.transcription_velocity.precision_recall_f1_overlap(
            ref_intervals=ref_intervals,
            ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
            ref_velocities=ref_velocities,
            est_intervals=est_intervals,
            est_pitches=pretty_midi.note_number_to_hz(est_pitches),
            est_velocities=est_velocities))
    '''
    return (note_precision, note_recall, note_f1, note_with_offsets_precision,
            note_with_offsets_recall, note_with_offsets_f1)
示例#5
0
def plot_piano_roll(pm, start_pitch, end_pitch, fs=100):
    # Use librosa's specshow function for displaying the piano roll
    piano_roll = pm.get_piano_roll(fs)
    #print(piano_roll)
    plt.figure(figsize=(8, 4))
    ld.specshow(piano_roll[start_pitch:end_pitch],
                hop_length=1,
                sr=fs,
                x_axis='time',
                y_axis='cqt_note',
                fmin=pretty_midi.note_number_to_hz(start_pitch),
                fmax=pretty_midi.note_number_to_hz(end_pitch))
    plt.colorbar()
    plt.title('Piano roll viz (notes)')
    plt.show()
def ex3():
    global sample_rate, sd
    midi_data = pretty_midi.PrettyMIDI("01-Recorded MIDI MIDI 001.mid") ## black sabbath - i
    # midi_data = pretty_midi.PrettyMIDI("16752.mid") ## beethoven moonlight sonata - kinda long
    midi_list = []

    ## parsare luata din primul exemplu de aici
    ## https://www.audiolabs-erlangen.de/resources/MIR/FMP/C1/C1S2_MIDI.html
    for instrument in midi_data.instruments:
        for note in instrument.notes:
            start = note.start
            end = note.end
            pitch = note.pitch
            velocity = note.velocity
            midi_list.append([start, end-start, pitch, velocity])
            
    time_frame = midi_data.get_end_time() ## in seconds
    samples = np.empty(shape=(int(time_frame * sample_rate),), dtype=np.float64)
    
    for tone in midi_list:
        x, _ = sine(pretty_midi.note_number_to_hz(tone[2]), 0, 5e3 * (tone[3] / 127), tone[0], tone[1])
        start_time = int(tone[0] * sample_rate)
        samples[start_time:(start_time+len(x))] = samples[start_time:(start_time+len(x))] + window(x)

    sd.default.samplerate = sample_rate
    wav_wave = np.array(samples, dtype=np.int16)
    sd.play(wav_wave, blocking=True)
    sd.stop()
def create_piano_roll(noteList, instrument, min_note_length):
    inst = pm.Instrument(0, is_drum=False, name=instrument)
    start = 0
    notelength = min_note_length   
    for i in range(len(noteList)):
        if noteList[i] == 128:
            start += notelength
        else:
            inst.notes.append(pm.Note(100, noteList[i], start, start+notelength))
            start += notelength
    if len(inst.notes) != 0:
        librosa.display.specshow(inst.get_piano_roll(100)[40:89], hop_length=1, sr=500, x_axis='time',
        y_axis='cqt_note', fmin=pm.note_number_to_hz(40), fmax=pm.note_number_to_hz(88))
    else: 
        print('No instrument detected')
    return inst
示例#8
0
    def assign_notes(self, id, note, on_or_off):
        note_0_ind = 2 * id
        note_1_ind = (2 * id) + 1
        note_0 = self.voice_notes[note_0_ind]
        note_1 = self.voice_notes[note_1_ind]

        if on_or_off:  # On
            if note_0 and note_1:
                if note_0 != note_1:
                    return  # If two different notes are held, throw out
                else:
                    if note > note_0:
                        self.voice_notes[note_1_ind] = note
                    else:
                        self.voice_notes[note_0_ind] = note
            else:
                self.voice_notes[note_0_ind] = note
                self.voice_notes[note_1_ind] = note
        else:  # Off
            if note_0 == note and note_1 == note:
                self.voice_notes[note_0_ind] = None
                self.voice_notes[note_1_ind] = None
            elif note_0 == note:
                self.voice_notes[note_0_ind] = note_1
            elif note_1 == note:
                self.voice_notes[note_1_ind] = note_0

        for i, note in enumerate(self.voice_notes):
            if note:
                self.synths[i].freq = note_number_to_hz(note)
                self.synths[i].play()
            else:
                self.synths[i].stop()
def plot_piano_roll(pm, start_pitch, end_pitch, fs=100):
    librosa.display.specshow(pm.get_piano_roll(fs)[start_pitch:end_pitch],
                             hop_length=1,
                             sr=fs,
                             x_axis='time',
                             y_axis='cqt_note',
                             fmin=pretty_midi.note_number_to_hz(start_pitch))
示例#10
0
def plot_piano_roll(pm, start_pitch, end_pitch, fs=fs):
    # Use librosa's specshow function for displaying the piano roll
    librosa.display.specshow(pm.get_piano_roll(fs)[start_pitch:end_pitch],
                             hop_length=1,
                             sr=fs,
                             x_axis='time',
                             y_axis='cqt_note',
                             fmin=pretty_midi.note_number_to_hz(start_pitch))
示例#11
0
def save_roll(x, step):
    fig = plt.figure(figsize=(72, 24))
    librosa.display.specshow(x,
                             x_axis='time',
                             hop_length=1,
                             sr=96,
                             fmin=pm.note_number_to_hz(12))
    plt.title('{}'.format(step))
    fig.savefig('Samples/{}.png'.format(step))
    plt.close(fig)
示例#12
0
def plot_piano_roll(pm, start_pitch=0, end_pitch=127, fs=50):
    # Use librosa's specshow function for displaying the piano roll
    librosa.display.specshow(pm[start_pitch:end_pitch],
                             hop_length=1,
                             sr=fs,
                             x_axis='time',
                             y_axis='cqt_note',
                             fmin=pretty_midi.note_number_to_hz(start_pitch))
    plt.figure(figsize=(8, 4))
    plt.show()
示例#13
0
def plot_piano_roll(pm, start_pitch=0, end_pitch=127, fs=120):
    # Use librosa's specshow function for displaying the piano roll
    return librosa.display.specshow(
        pm.get_piano_roll(fs)[start_pitch:end_pitch],
        hop_length=1,
        sr=fs,
        x_axis="time",
        y_axis="cqt_note",
        fmin=pretty_midi.note_number_to_hz(start_pitch),
    )
示例#14
0
def load_midi(midi_path):
    #read MIDI notes
    note, dur, pitch = [], [], []
    midi_data = pretty_midi.PrettyMIDI(midi_path)
    for instrument in midi_data.instruments:
        for n in instrument.notes:
            # note.velocity
            note.append(pretty_midi.note_number_to_hz(n.pitch))
            dur.append(int((n.end - n.start) * 220))
            pitch.append(n.pitch)

    return note, dur, pitch
示例#15
0
 def assign_notes_to_voices(self):
     for i in range(len(self.current_notes)):
         singer_id = i + 1
         if singer_id in self.id_note_map.keys():
             if self.id_note_map[singer_id] != self.current_notes[i]:
                 self.id_note_map[singer_id] = None
                 self.synths[singer_id - 1].stop()
                 self.id_note_map[singer_id] = self.current_notes[i]
                 self.synths[singer_id - 1].freq = note_number_to_hz(
                     self.id_note_map[singer_id])
                 self.synths[singer_id - 1].play()
     for singer_id in range(len(self.current_notes) + 1, 5):
         if singer_id in self.id_note_map.keys():
             self.id_note_map[singer_id] = None
             self.synths[singer_id - 1].stop()
示例#16
0
def generate_piano_roll(score, title, path, start_pitch, end_pitch, width,
                        height, fs):
    """Save a piano roll image."""

    plt.figure()
    plt.figure(figsize=(width, height))
    plt.title(title)

    librosa.display.specshow(score.get_piano_roll(fs)[start_pitch:end_pitch],
                             hop_length=1,
                             sr=fs,
                             x_axis='time',
                             y_axis='cqt_note',
                             fmin=midi.note_number_to_hz(start_pitch))

    plt.tight_layout()
    plt.savefig(path)
def get_df_mid(midi_data):
    midi_list = []
    """Temporary change where you choose the lowest value  """
    tempo = midi_data.estimate_tempi()
    pos = tempo[0]
    cal = tempo[1]
    tempo = pos[cal.argmin()]
    for instrument in midi_data.instruments:
        for note in instrument.notes:
            start = note.start
            end = note.end
            midi = note.pitch
            pitch = pretty_midi.note_number_to_hz(midi)
            duration = pretty_midi.Note.get_duration(note)
            midi_list.append([start, end, duration, midi, pitch, tempo])
    mid_df = pd.DataFrame(
        midi_list,
        columns=['start', 'end', 'duration', 'midi', 'pitch', 'tempo'])
    return mid_df
def get_piano_roll_matrix(midi_data,
                          start_pitch,
                          end_pitch,
                          fs=50,
                          draw=False):
    # roll = midi_data.get_piano_roll(fs)[start_pitch:end_pitch]
    matrix = midi_data.get_piano_roll(fs)[:, :10000]
    # print(matrix[:, 30:40])
    # print(matrix.shape)

    if draw:
        librosa.display.specshow(
            matrix,
            hop_length=1,
            sr=fs,
            x_axis='time',
            y_axis='cqt_note',
            fmin=pretty_midi.note_number_to_hz(start_pitch))

    return np.array(matrix).flatten()
示例#19
0
def convert_midi_to_events(midi_path: str, settings: Dict[str,
                                                          Any]) -> List[Event]:
    """
    Collect sound events (loosely speaking, played notes) from a MIDI file.

    :param midi_path:
        path to source MIDI file
    :param settings:
        global settings for the output track
    :return:
        sound events
    """
    midi_settings = settings['midi']
    if 'track_name_to_instrument' in midi_settings:
        instruments_mapping = midi_settings['track_name_to_instrument']
        effects_mapping = midi_settings.get('track_name_to_effects', {})
        key_fn = lambda instrument: instrument.name
    elif 'program_to_instrument' in midi_settings:
        instruments_mapping = midi_settings['program_to_instrument']
        effects_mapping = midi_settings.get('program_to_effects', {})
        key_fn = lambda instrument: instrument.program
    else:
        raise RuntimeError("MIDI config file lacks required sections.")

    midi_data = pretty_midi.PrettyMIDI(midi_path)
    events = []
    for pretty_midi_instrument in midi_data.instruments:
        key = key_fn(pretty_midi_instrument)
        sinethesizer_instrument = instruments_mapping.get(key)
        if sinethesizer_instrument is None:
            continue
        for note in pretty_midi_instrument.notes:
            event = Event(instrument=sinethesizer_instrument,
                          start_time=note.start,
                          duration=note.end - note.start,
                          frequency=pretty_midi.note_number_to_hz(note.pitch),
                          velocity=note.velocity / MAX_MIDI_VALUE,
                          effects=effects_mapping.get(key, ''),
                          frame_rate=settings['frame_rate'])
            events.append(event)
    return events
示例#20
0
def numpy_to_midi(sample_roll, display=False, interpolation=False):
    music = pretty_midi.PrettyMIDI()
    piano_program = pretty_midi.instrument_name_to_program(
        'Acoustic Grand Piano')
    piano = pretty_midi.Instrument(program=piano_program)
    t = 0
    for i in sample_roll:
        if 'torch' in str(type(i)):
            pitch = int(i.max(0)[1])
        else:
            pitch = int(np.argmax(i))
        if pitch < 128:
            note = pretty_midi.Note(velocity=100,
                                    pitch=pitch,
                                    start=t,
                                    end=t + 1 / 16)
            t += 1 / 16
            piano.notes.append(note)
        elif pitch == 128:
            if len(piano.notes) > 0:
                note = piano.notes.pop()
            else:
                p = np.random.randint(60, 72)
                note = pretty_midi.Note(velocity=100,
                                        pitch=int(p),
                                        start=0,
                                        end=t)
            note = pretty_midi.Note(velocity=100,
                                    pitch=note.pitch,
                                    start=note.start,
                                    end=note.end + 1 / 16)
            piano.notes.append(note)
            t += 1 / 16
        elif pitch == 129:
            t += 1 / 16
    music.instruments.append(piano)
    if display:
        plt.figure(figsize=(30, 10))
        start, end = bound(m=music.get_piano_roll(100))
        ax = plt.gca()
        ax.tick_params(axis='both', which='major', labelsize=44)
        #         ax.tick_params(axis = 'both', which = 'minor', labelsize = 20)
        if interpolation:
            shape = music.get_piano_roll(100)[start + 1:end - 1].shape
            if shape[1] == 9600:
                plt.axvspan(0, 16, facecolor="#6600cc", alpha=0.5)
                plt.axvspan(16, 32, facecolor="#8510b3", alpha=0.5)
                plt.axvspan(32, 48, facecolor="#a3209a", alpha=0.5)
                plt.axvspan(48, 64, facecolor="#c23082", alpha=0.5)
                plt.axvspan(64, 80, facecolor="#e04069", alpha=0.5)
                plt.axvspan(80, 96, facecolor="#ff5050", alpha=0.5)
            else:
                plt.axvspan(0, 2, facecolor="#6600cc", alpha=0.5)
                plt.axvspan(2, 4, facecolor="#8510b3", alpha=0.5)
                plt.axvspan(4, 6, facecolor="#a3209a", alpha=0.5)
                plt.axvspan(6, 8, facecolor="#c23082", alpha=0.5)
                plt.axvspan(8, 10, facecolor="#e04069", alpha=0.5)
                plt.axvspan(10, 12, facecolor="#ff5050", alpha=0.5)
            cmap = matplotlib.colors.ListedColormap(['white', "black"])
            librosa.display.specshow(music.get_piano_roll(100)[start:end],
                                     hop_length=1,
                                     sr=100,
                                     x_axis='time',
                                     y_axis='cqt_note',
                                     fmin=pretty_midi.note_number_to_hz(48),
                                     cmap=cmap,
                                     shading='flat')
        else:
            librosa.display.specshow(music.get_piano_roll(100)[start:end],
                                     hop_length=1,
                                     sr=100,
                                     x_axis='time',
                                     y_axis='cqt_note',
                                     fmin=pretty_midi.note_number_to_hz(48),
                                     cmap="inferno",
                                     shading='flat')
        plt.xlabel('time(s)', fontsize=48)
        plt.ylabel('pitch', fontsize=48)
        ax.minorticks_off()
        plt.show()

    return music.synthesize(wave=scipy.signal.square)
示例#21
0
 def get_hz(self):
     return round(pretty_midi.note_number_to_hz(self.note_number),
                  settings.initial_octave)
示例#22
0
def plot_onsets(x,
                fs,
                hop,
                c,
                thresh,
                onset_times,
                file_name,
                midi_onsets,
                f_est,
                tolerance=0.02):
    # --
    # awesome plot

    # time vector
    t = np.arange(0, len(x) / fs, 1 / fs)

    # frame vector
    time_frames = (np.arange(0, len(x) - hop, hop) + hop / 2) / fs

    # plot
    plt.figure(3, figsize=(8, 4))
    plt.plot(t, x / max(x), label='audiofile', linewidth=1)
    plt.plot(time_frames, c / max(c), label='complex domain', linewidth=1)
    plt.plot(time_frames,
             thresh / max(c),
             label='adaptive threshold',
             linewidth=1)

    # annotations midi labels
    for i, mid in enumerate(midi_onsets):

        # draw vertical lines
        if i == 0:
            # put label to legend
            plt.axvline(x=mid[3],
                        dashes=(2, 2),
                        color='k',
                        label="midi-labels")
            #plt.text(x=mid[3], y=0.9, s=note_number_to_name(mid[1]), color='k', fontweight='semibold')
            plt.text(x=mid[3],
                     y=0.9,
                     s=round(note_number_to_hz(mid[1]), 1),
                     color='k',
                     fontweight='semibold')

        else:
            plt.axvline(x=float(mid[3]), dashes=(2, 2), color='k')
            #plt.text(x=mid[3], y=0.9, s=note_number_to_name(mid[1]), color='k', fontweight='semibold')
            plt.text(x=mid[3],
                     y=0.9,
                     s=round(note_number_to_hz(mid[1]), 1),
                     color='k',
                     fontweight='semibold')

    # tolerance band of each label
    neg_label_tolerance = midi_onsets[:, 3] - tolerance
    pos_label_tolerance = midi_onsets[:, 3] + tolerance
    green_label = False
    red_label = False

    # annotations targets
    for i, a in enumerate(onset_times):

        # decide if correct or not -> color
        is_tp = np.sum(
            np.logical_and(neg_label_tolerance < a, pos_label_tolerance > a))

        # draw vertical lines
        if is_tp == 1:

            if green_label == False:
                # put label
                green_label = True
                plt.axvline(x=float(a),
                            dashes=(5, 1),
                            color='g',
                            label="targets TP")
                plt.text(x=float(a),
                         y=1,
                         s=f_est[i],
                         color='g',
                         fontweight='semibold')
            else:
                plt.axvline(x=float(a), dashes=(5, 1), color='g')
                plt.text(x=float(a),
                         y=1,
                         s=f_est[i],
                         color='g',
                         fontweight='semibold')

        else:
            if red_label == False:
                # put label
                red_label = True
                plt.axvline(x=float(a),
                            dashes=(5, 1),
                            color='r',
                            label="targets FP")
                plt.text(x=float(a),
                         y=0.8,
                         s=f_est[i],
                         color='r',
                         fontweight='semibold')
            else:
                plt.axvline(x=float(a), dashes=(5, 1), color='r')
                plt.text(x=float(a),
                         y=0.8,
                         s=f_est[i],
                         color='r',
                         fontweight='semibold')

    plt.title(file_name)
    plt.ylabel('magnitude')
    plt.xlabel('time [s]')

    plt.grid()
    plt.legend(prop={'size': 7})

    #plt.savefig('class' + str(r) + '.png', dpi=150)
    plt.show()
示例#23
0
def addSweep(extension):
    # Load configuration

    if os.path.exists('results/sweep_data.npz'):
        tmp = np.load('results/sweep_data.npz')
        current_extension = tmp['arr_0']
    else:
        current_extension = []

    if extension in current_extension:
        print("< Extension already in archive")
    else:
        config = load_configurations(extension)
        assert(extension==config['extension'])


        config['temporal_bias'] = 0
        config['augmentation_factor'] = 7

        ensembling_factor = 0.25
        suppression_field = 9

        # Load out-of-sample data
        print('>> Load Dataset...')
        test_files = np.load('models/' + config['extension'] + '_test_files.npy')
        x_out_raw, _, y_out_raw, stretch_factor_out, file_list_out_raw = generateSplitDataset(test_files, config, infer=True, labelNoise=False)
        print("---", len(test_files), "---")


        print("---", config['extension'], "---")

        import os
        os.environ["CUDA_VISIBLE_DEVICES"] = ""

        new_results = np.zeros([3,3])
        with tf.Session() as sess:
            # Restore model
            softModel = SoftNetwork(config)
            softModel.restore(sess)

            for selection_channel in range(0,3):

                x_out = np.copy(x_out_raw)
                y_out = np.copy(y_out_raw)
                file_list_out = np.copy(file_list_out_raw)


                # Select channel
                idx_channel = [0, 1, 2]
                idx_channel.remove(selection_channel)
                y_out[:, idx_channel, :] = 0

                # Single extract score
                pp = softModel.predict(sess, x_out)
                pp[:, idx_channel, :] = 0
                _, _ = lp.localizationPlot(pp, y_out, n_samples=20, dist_threshold=config['tolerence'], factor=1,
                                           bias=config['temporal_bias'], decimals=7)
                plt.close()

                # Ensembling score
                pp_trans = np.transpose(pp.reshape([pp.shape[0] // config['augmentation_factor'], config['augmentation_factor'], pp.shape[1], pp.shape[2]]), [1, 0, 2, 3])
                pp_ensemble = softModel.FastEnsembling(pp_trans, stretch_factor_out, ensembling_factor, suppression_field)

                plt.figure()
                _, _ = lp.localizationPlot(pp_ensemble, y_out[::config['augmentation_factor'], :, :], n_samples=10, dist_threshold=config['tolerence'],
                                           factor=1, bias=config['temporal_bias'], decimals=7)
                plt.close()

                _start_extract = 16
                y_ensemble = y_out[::config['augmentation_factor'], :, :]
                file_list_out = file_list_out[::config['augmentation_factor']]

                y_pasted = np.zeros([len(test_files), pp_ensemble.shape[1], 30000])
                pp_pasted = np.zeros([len(test_files), pp_ensemble.shape[1], 30000])
                ww = np.zeros([len(test_files), pp_ensemble.shape[1], 30000])
                file_out_unique = []
                previous_source = ""
                idx_source = -1
                for ii in range(len(file_list_out)):
                    if file_list_out[ii] == previous_source:
                        idx_start += int(config['split_step'] * 200)
                    else:
                        idx_start = 0
                        idx_source += 1
                        previous_source = file_list_out[ii]
                        file_out_unique.append(previous_source)

                    y_pasted[idx_source, :, idx_start:idx_start + y_ensemble[ii, :, _start_extract:].shape[1]] += y_ensemble[ii, :, _start_extract:]
                    pp_pasted[idx_source, :, idx_start:idx_start + pp_ensemble[ii, :, _start_extract:].shape[1]] += pp_ensemble[ii, :, _start_extract:]
                    ww[idx_source, :, idx_start:idx_start + pp_ensemble[ii, :, _start_extract:int(config['split_length'] * 200)+_start_extract].shape[1]] += 1

                # Normalize
                pp_final = pp_pasted[:, :, np.sum(ww, axis=(0, 1)) > 0] / ww[:, :, np.sum(ww, axis=(0, 1)) > 0]
                y_final = y_pasted[:, :, np.sum(ww, axis=(0, 1)) > 0] > 0

                # Load labels from file
                yy = np.zeros([pp_final.shape[0], pp_final.shape[1], pp_final.shape[2]])
                yy_list = []
                for jj in range(yy.shape[0]):
                    label_raw = np.array(parseXML(file_out_unique[jj].replace('audio', 'annotation_xml').replace('wav', 'xml')))
                    for kk in range(label_raw.shape[0]):
                        yy[jj, int(label_raw[kk, 1]), int(label_raw[kk, 0] * 200)] += 1

                    yy_list.append(label_raw[np.logical_not([x in idx_channel for x in label_raw[:,1]]),:])

                yy[:, idx_channel, :] = 0


                # Check alignment
                plt.figure()
                plt.plot(yy[0, 0, :] - y_final[0, 0, :])
                plt.close('all')

                # Final prediction cleaning
                pp_final = pp_pasted[:, :, np.sum(ww, axis=(0, 1)) > 0] / ww[:, :, np.sum(ww, axis=(0, 1)) > 0]
                pp_final_cleaning = np.zeros([pp_final.shape[0], pp_final.shape[1], pp_final.shape[2]])
                for ii in range(pp_final_cleaning.shape[0]):
                    for jj in range(pp_final_cleaning.shape[1]):
                        for tt in range(pp_final_cleaning.shape[2]):
                            if pp_final[ii, jj, tt] > 0:
                                if np.sum(pp_final[ii, jj, tt:tt + suppression_field]) >= 0.50:
                                    pp_final_cleaning[ii, jj, tt] = 1
                                    pp_final[ii, jj, tt:tt + suppression_field] = 0

                # Final score computation
                plt.figure()
                fig, _ = lp.localizationPlot(pp_final_cleaning[:, :, :], yy[:, :, :], n_samples=pp_final_cleaning.shape[0],
                                             dist_threshold=config['tolerence'],
                                             factor=1, bias=config['temporal_bias'], decimals=7)
                plt.close()


                pp_list = []
                for ii in range(pp_final.shape[0]):
                    triggers = np.zeros([0,2])
                    for jj in range(pp_final.shape[1]):
                        list_hits = np.where(pp_final_cleaning[ii,jj])[0]/200
                        triggers = np.concatenate([triggers, np.concatenate([list_hits[:,np.newaxis],np.array([jj]*len(list_hits))[:,np.newaxis]],axis=1)])
                    pp_list.append(triggers)

                fig, _ = lp.localizationPlotList(pp_list, yy_list, decimals=7, bias= 0.000,  n_samples = 20, dist_threshold=0.050)
                plt.savefig('plt/inference/' + config['extension']+ "_" + str(selection_channel))
                plt.close()

                f1_list = []
                pre_list = []
                rec_list = []
                for kk in range(0,len(yy_list)):
                    pre, rec, f1, _ = (
                        mir_eval.transcription.precision_recall_f1_overlap(
                            np.concatenate([np.array([max(x,0) for x in yy_list[kk][:, 0]])[:,np.newaxis], yy_list[kk][:, 0:1] + 1], axis=1),
                            pretty_midi.note_number_to_hz(yy_list[kk][:, 1]),
                            np.concatenate([np.array([max(x,0) for x in pp_list[kk][:, 0]])[:,np.newaxis], pp_list[kk][:, 0:1] + 1], axis=1),
                            pretty_midi.note_number_to_hz(pp_list[kk][:, 1]),
                            offset_ratio=None))
                    f1_list.append(f1)
                    pre_list.append(pre)
                    rec_list.append(rec)

                print(np.mean(f1_list), np.mean(pre_list), np.mean(rec_list))

                new_results[selection_channel,:] = np.array([np.mean(f1_list), np.mean(pre_list), np.mean(rec_list)])

                print("---", config['extension'], "---", selection_channel, "---")

        softModel.reset()

        # Reload in case other update occurred in the mean-time
        if os.path.exists('results/sweep_data.npz'):
            tmp = np.load('results/sweep_data.npz')
            current_extension = tmp['arr_0']
            current_results = tmp['arr_1']
            current = current_extension.tolist()
            current.append(extension)
            np.savez('results/sweep_data.npz', current, np.concatenate([current_results,new_results[np.newaxis, :, :]],axis=0))
        else:
            np.savez('results/sweep_data.npz', current_extension.tolist(), new_results[np.newaxis, :, :])

    # create updated image
    import sweepVisualization
示例#24
0
def score_sequence(session, global_step_increment, summary_op, summary_writer,
                   metrics_to_updates, metric_note_precision,
                   metric_note_recall, metric_note_f1,
                   metric_note_precision_with_offsets,
                   metric_note_recall_with_offsets,
                   metric_note_f1_with_offsets, metric_frame_labels,
                   metric_frame_predictions, frame_labels, sequence_prediction,
                   frames_per_second, note_sequence_str_label, min_duration_ms,
                   sequence_id):
    """Calculate metrics on the inferred sequence."""
    est_intervals, est_pitches = sequence_to_valued_intervals(
        sequence_prediction, min_duration_ms=min_duration_ms)

    sequence_label = music_pb2.NoteSequence.FromString(note_sequence_str_label)
    ref_intervals, ref_pitches = sequence_to_valued_intervals(
        sequence_label, min_duration_ms=min_duration_ms)

    sequence_note_precision, sequence_note_recall, sequence_note_f1, _ = (
        mir_eval.transcription.precision_recall_f1_overlap(
            ref_intervals,
            pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals,
            pretty_midi.note_number_to_hz(est_pitches),
            offset_ratio=None))

    (sequence_note_precision_with_offsets, sequence_note_recall_with_offsets,
     sequence_note_f1_with_offsets,
     _) = (mir_eval.transcription.precision_recall_f1_overlap(
         ref_intervals, pretty_midi.note_number_to_hz(ref_pitches),
         est_intervals, pretty_midi.note_number_to_hz(est_pitches)))

    frame_predictions = sequences_lib.sequence_to_pianoroll(
        sequence_prediction,
        frames_per_second=frames_per_second,
        min_pitch=constants.MIN_MIDI_PITCH,
        max_pitch=constants.MAX_MIDI_PITCH).active

    if frame_predictions.shape[0] < frame_labels.shape[0]:
        # Pad transcribed frames with silence.
        pad_length = frame_labels.shape[0] - frame_predictions.shape[0]
        frame_predictions = np.pad(frame_predictions, [(0, pad_length),
                                                       (0, 0)], 'constant')
    elif frame_predictions.shape[0] > frame_labels.shape[0]:
        # Truncate transcribed frames.
        frame_predictions = frame_predictions[:frame_labels.shape[0], :]

    global_step, _ = session.run(
        [global_step_increment, metrics_to_updates], {
            metric_frame_predictions: frame_predictions,
            metric_frame_labels: frame_labels,
            metric_note_precision: sequence_note_precision,
            metric_note_recall: sequence_note_recall,
            metric_note_f1: sequence_note_f1,
            metric_note_precision_with_offsets:
            sequence_note_precision_with_offsets,
            metric_note_recall_with_offsets: sequence_note_recall_with_offsets,
            metric_note_f1_with_offsets: sequence_note_f1_with_offsets
        })
    # Running the summary op separately ensures that all of the metrics have been
    # updated before we try to query them.
    summary = session.run(summary_op)

    tf.logging.info('Writing score summary for %s: Step= %d, Note F1=%f',
                    sequence_id, global_step, sequence_note_f1)
    summary_writer.add_summary(summary, global_step)
    summary_writer.flush()

    return sequence_label
示例#25
0
def score_sequence(session, global_step_increment, metrics_to_updates,
                   metric_note_precision, metric_note_recall, metric_note_f1,
                   metric_note_precision_with_offsets,
                   metric_note_recall_with_offsets,
                   metric_note_f1_with_offsets,
                   metric_note_precision_with_offsets_velocity,
                   metric_note_recall_with_offsets_velocity,
                   metric_note_f1_with_offsets_velocity, metric_frame_labels,
                   metric_frame_predictions, frame_labels, sequence_prediction,
                   frames_per_second, sequence_label, sequence_id):
    """Calculate metrics on the inferred sequence."""
    est_intervals, est_pitches, est_velocities = sequence_to_valued_intervals(
        sequence_prediction)

    ref_intervals, ref_pitches, ref_velocities = sequence_to_valued_intervals(
        sequence_label)

    sequence_note_precision, sequence_note_recall, sequence_note_f1, _ = (
        mir_eval.transcription.precision_recall_f1_overlap(
            ref_intervals,
            pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals,
            pretty_midi.note_number_to_hz(est_pitches),
            offset_ratio=None))

    (sequence_note_precision_with_offsets, sequence_note_recall_with_offsets,
     sequence_note_f1_with_offsets,
     _) = (mir_eval.transcription.precision_recall_f1_overlap(
         ref_intervals, pretty_midi.note_number_to_hz(ref_pitches),
         est_intervals, pretty_midi.note_number_to_hz(est_pitches)))

    (sequence_note_precision_with_offsets_velocity,
     sequence_note_recall_with_offsets_velocity,
     sequence_note_f1_with_offsets_velocity,
     _) = (mir_eval.transcription_velocity.precision_recall_f1_overlap(
         ref_intervals=ref_intervals,
         ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
         ref_velocities=ref_velocities,
         est_intervals=est_intervals,
         est_pitches=pretty_midi.note_number_to_hz(est_pitches),
         est_velocities=est_velocities))

    frame_predictions = sequences_lib.sequence_to_pianoroll(
        sequence_prediction,
        frames_per_second=frames_per_second,
        min_pitch=constants.MIN_MIDI_PITCH,
        max_pitch=constants.MAX_MIDI_PITCH).active

    if frame_predictions.shape[0] < frame_labels.shape[0]:
        # Pad transcribed frames with silence.
        pad_length = frame_labels.shape[0] - frame_predictions.shape[0]
        frame_predictions = np.pad(frame_predictions, [(0, pad_length),
                                                       (0, 0)], 'constant')
    elif frame_predictions.shape[0] > frame_labels.shape[0]:
        # Truncate transcribed frames.
        frame_predictions = frame_predictions[:frame_labels.shape[0], :]

    global_step, _ = session.run(
        [global_step_increment, metrics_to_updates], {
            metric_frame_predictions:
            frame_predictions,
            metric_frame_labels:
            frame_labels,
            metric_note_precision:
            sequence_note_precision,
            metric_note_recall:
            sequence_note_recall,
            metric_note_f1:
            sequence_note_f1,
            metric_note_precision_with_offsets:
            sequence_note_precision_with_offsets,
            metric_note_recall_with_offsets:
            sequence_note_recall_with_offsets,
            metric_note_f1_with_offsets:
            sequence_note_f1_with_offsets,
            metric_note_precision_with_offsets_velocity:
            sequence_note_precision_with_offsets_velocity,
            metric_note_recall_with_offsets_velocity:
            sequence_note_recall_with_offsets_velocity,
            metric_note_f1_with_offsets_velocity:
            sequence_note_f1_with_offsets_velocity,
        })

    tf.logging.info('Updating scores for %s: Step= %d, Note F1=%f',
                    sequence_id, global_step, sequence_note_f1)
示例#26
0
def _calculate_metrics_py(frame_probs,
                          onset_probs,
                          frame_predictions,
                          onset_predictions,
                          offset_predictions,
                          velocity_values,
                          sequence_label_str,
                          frame_labels,
                          sequence_id,
                          hparams,
                          min_pitch,
                          max_pitch,
                          onsets_only,
                          restrict_to_pitch=None):
    """Python logic for calculating metrics on a single example."""
    tf.logging.info('Calculating metrics for %s with length %d', sequence_id,
                    frame_labels.shape[0])

    sequence_prediction = infer_util.predict_sequence(
        frame_probs=frame_probs,
        onset_probs=onset_probs,
        frame_predictions=frame_predictions,
        onset_predictions=onset_predictions,
        offset_predictions=offset_predictions,
        velocity_values=velocity_values,
        min_pitch=min_pitch,
        hparams=hparams,
        onsets_only=onsets_only)

    sequence_label = music_pb2.NoteSequence.FromString(sequence_label_str)

    if hparams.backward_shift_amount_ms:
        def shift_notesequence(ns_time):
            return ns_time + hparams.backward_shift_amount_ms / 1000.

        shifted_sequence_label, skipped_notes = (
            sequences_lib.adjust_notesequence_times(sequence_label,
                                                    shift_notesequence))
        assert skipped_notes == 0
        sequence_label = shifted_sequence_label

    est_intervals, est_pitches, est_velocities = (
        sequence_to_valued_intervals(
            sequence_prediction, restrict_to_pitch=restrict_to_pitch))

    ref_intervals, ref_pitches, ref_velocities = (
        sequence_to_valued_intervals(
            sequence_label, restrict_to_pitch=restrict_to_pitch))

    processed_frame_predictions = sequences_lib.sequence_to_pianoroll(
        sequence_prediction,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_pitch=min_pitch,
        max_pitch=max_pitch).active

    if processed_frame_predictions.shape[0] < frame_labels.shape[0]:
        # Pad transcribed frames with silence.
        pad_length = frame_labels.shape[0] - processed_frame_predictions.shape[0]
        processed_frame_predictions = np.pad(processed_frame_predictions,
                                             [(0, pad_length), (0, 0)], 'constant')
    elif processed_frame_predictions.shape[0] > frame_labels.shape[0]:
        # Truncate transcribed frames.
        processed_frame_predictions = (
            processed_frame_predictions[:frame_labels.shape[0], :])

    if len(ref_pitches) == 0:
        tf.logging.info(
            'Reference pitches were length 0, returning empty metrics for %s:',
            sequence_id)
        return tuple([[]] * 12 + [processed_frame_predictions])

    note_precision, note_recall, note_f1, _ = (
        mir_eval.transcription.precision_recall_f1_overlap(
            ref_intervals,
            pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals,
            pretty_midi.note_number_to_hz(est_pitches),
            offset_ratio=None))

    (note_with_velocity_precision, note_with_velocity_recall,
     note_with_velocity_f1, _) = (
        mir_eval.transcription_velocity.precision_recall_f1_overlap(
            ref_intervals=ref_intervals,
            ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
            ref_velocities=ref_velocities,
            est_intervals=est_intervals,
            est_pitches=pretty_midi.note_number_to_hz(est_pitches),
            est_velocities=est_velocities,
            offset_ratio=None))

    (note_with_offsets_precision, note_with_offsets_recall, note_with_offsets_f1,
     _) = (
        mir_eval.transcription.precision_recall_f1_overlap(
            ref_intervals, pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals, pretty_midi.note_number_to_hz(est_pitches)))

    (note_with_offsets_velocity_precision, note_with_offsets_velocity_recall,
     note_with_offsets_velocity_f1, _) = (
        mir_eval.transcription_velocity.precision_recall_f1_overlap(
            ref_intervals=ref_intervals,
            ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
            ref_velocities=ref_velocities,
            est_intervals=est_intervals,
            est_pitches=pretty_midi.note_number_to_hz(est_pitches),
            est_velocities=est_velocities))

    tf.logging.info(
        'Metrics for %s: Note F1 %f, Note w/ velocity F1 %f, Note w/ offsets F1 %f, '
        'Note w/ offsets & velocity: %f', sequence_id, note_f1,
        note_with_velocity_f1, note_with_offsets_f1,
        note_with_offsets_velocity_f1)
    # Return 1-d tensors for the metrics
    return ([note_precision], [note_recall], [note_f1],
            [note_with_velocity_precision], [note_with_velocity_recall],
            [note_with_velocity_f1], [note_with_offsets_precision],
            [note_with_offsets_recall], [note_with_offsets_f1
                                         ], [note_with_offsets_velocity_precision],
            [note_with_offsets_velocity_recall], [note_with_offsets_velocity_f1
                                                  ], [processed_frame_predictions])
示例#27
0
                                         bias=-0.025,
                                         n_samples=1)
        plt.savefig('infer/images/' + version + "_" +
                    infer_files[0].split("/")[-1].replace('.wav', '.png'))
        plt.close('all')

        np.save(
            'infer/' + version + "_" +
            infer_files[0].split("/")[-1].replace('.wav', '.npy'),
            prediction_list)

        # Final MIR Score
        prediction_list[:, 0] += 0.025
        pre, rec, f1, _ = (mir_eval.transcription.precision_recall_f1_overlap(
            np.concatenate([label_raw[:, 0:1], label_raw[:, 0:1] + 1], axis=1),
            pretty_midi.note_number_to_hz(label_raw[:, 1]),
            np.concatenate(
                [prediction_list[:, 0:1] - 0.030, prediction_list[:, 0:1] + 1],
                axis=1),
            pretty_midi.note_number_to_hz(prediction_list[:, 1]),
            offset_ratio=None))

        f1_list.append(f1)
        pre_list.append(pre)
        rec_list.append(rec)

        print(f1, pre, rec)
        print(np.mean(f1_list), np.mean(pre_list), np.mean(rec_list), idx_file)
        print('---------------------------')
    else:
        print("<<< Already exists!")
示例#28
0
def _calculate_metrics_py(frame_predictions, onset_predictions,
                          offset_predictions, velocity_values,
                          sequence_label_str, frame_labels, sequence_id,
                          hparams):
    """Python logic for calculating metrics on a single example."""
    tf.logging.info('Calculating metrics for %s with length %d', sequence_id,
                    frame_labels.shape[0])
    if not hparams.predict_onset_threshold:
        onset_predictions = None
    if not hparams.predict_offset_threshold:
        offset_predictions = None

    sequence_prediction = sequences_lib.pianoroll_to_note_sequence(
        frames=frame_predictions,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_duration_ms=0,
        min_midi_pitch=constants.MIN_MIDI_PITCH,
        onset_predictions=onset_predictions,
        offset_predictions=offset_predictions,
        velocity_values=velocity_values)

    sequence_label = music_pb2.NoteSequence.FromString(sequence_label_str)

    if hparams.backward_shift_amount_ms:

        def shift_notesequence(ns_time):
            return ns_time + hparams.backward_shift_amount_ms / 1000.

        shifted_sequence_label, skipped_notes = (
            sequences_lib.adjust_notesequence_times(sequence_label,
                                                    shift_notesequence))
        assert skipped_notes == 0
        sequence_label = shifted_sequence_label

    est_intervals, est_pitches, est_velocities = (
        infer_util.sequence_to_valued_intervals(sequence_prediction))

    ref_intervals, ref_pitches, ref_velocities = (
        infer_util.sequence_to_valued_intervals(sequence_label))

    note_precision, note_recall, note_f1, _ = (
        mir_eval.transcription.precision_recall_f1_overlap(
            ref_intervals,
            pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals,
            pretty_midi.note_number_to_hz(est_pitches),
            offset_ratio=None))

    (note_with_offsets_precision, note_with_offsets_recall,
     note_with_offsets_f1,
     _) = (mir_eval.transcription.precision_recall_f1_overlap(
         ref_intervals, pretty_midi.note_number_to_hz(ref_pitches),
         est_intervals, pretty_midi.note_number_to_hz(est_pitches)))

    (note_with_offsets_velocity_precision, note_with_offsets_velocity_recall,
     note_with_offsets_velocity_f1,
     _) = (mir_eval.transcription_velocity.precision_recall_f1_overlap(
         ref_intervals=ref_intervals,
         ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
         ref_velocities=ref_velocities,
         est_intervals=est_intervals,
         est_pitches=pretty_midi.note_number_to_hz(est_pitches),
         est_velocities=est_velocities))

    processed_frame_predictions = sequences_lib.sequence_to_pianoroll(
        sequence_prediction,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_pitch=constants.MIN_MIDI_PITCH,
        max_pitch=constants.MAX_MIDI_PITCH).active

    if processed_frame_predictions.shape[0] < frame_labels.shape[0]:
        # Pad transcribed frames with silence.
        pad_length = frame_labels.shape[0] - processed_frame_predictions.shape[
            0]
        processed_frame_predictions = np.pad(processed_frame_predictions,
                                             [(0, pad_length),
                                              (0, 0)], 'constant')
    elif processed_frame_predictions.shape[0] > frame_labels.shape[0]:
        # Truncate transcribed frames.
        processed_frame_predictions = (
            processed_frame_predictions[:frame_labels.shape[0], :])

    tf.logging.info(
        'Metrics for %s: Note F1 %f, Note w/ offsets F1 %f, '
        'Note w/ offsets & velocity: %f', sequence_id, note_f1,
        note_with_offsets_f1, note_with_offsets_velocity_f1)
    return (note_precision, note_recall, note_f1, note_with_offsets_precision,
            note_with_offsets_recall, note_with_offsets_f1,
            note_with_offsets_velocity_precision,
            note_with_offsets_velocity_recall, note_with_offsets_velocity_f1,
            processed_frame_predictions)
示例#29
0
def score_sequence(session, global_step_increment, summary_op, summary_writer,
                   metrics_to_updates, metric_note_precision,
                   metric_note_recall, metric_note_f1,
                   metric_note_precision_with_offsets,
                   metric_note_recall_with_offsets,
                   metric_note_f1_with_offsets, metric_frame_labels,
                   metric_frame_predictions, frame_labels, sequence_prediction,
                   frames_per_second, note_sequence_str_label, min_duration_ms,
                   sequence_id):
  """Calculate metrics on the inferred sequence."""
  est_intervals, est_pitches = sequence_to_valued_intervals(
      sequence_prediction,
      min_duration_ms=min_duration_ms)

  sequence_label = music_pb2.NoteSequence.FromString(note_sequence_str_label)
  ref_intervals, ref_pitches = sequence_to_valued_intervals(
      sequence_label,
      min_duration_ms=min_duration_ms)

  sequence_note_precision, sequence_note_recall, sequence_note_f1, _ = (
      mir_eval.transcription.precision_recall_f1_overlap(
          ref_intervals,
          pretty_midi.note_number_to_hz(ref_pitches),
          est_intervals,
          pretty_midi.note_number_to_hz(est_pitches),
          offset_ratio=None))

  (sequence_note_precision_with_offsets,
   sequence_note_recall_with_offsets,
   sequence_note_f1_with_offsets, _) = (
       mir_eval.transcription.precision_recall_f1_overlap(
           ref_intervals,
           pretty_midi.note_number_to_hz(ref_pitches),
           est_intervals,
           pretty_midi.note_number_to_hz(est_pitches)))

  frame_predictions, _, _ = data.sequence_to_pianoroll(
      sequence_prediction,
      frames_per_second=frames_per_second,
      min_pitch=constants.MIN_MIDI_PITCH,
      max_pitch=constants.MAX_MIDI_PITCH)

  if frame_predictions.shape[0] < frame_labels.shape[0]:
    # Pad transcribed frames with silence.
    pad_length = frame_labels.shape[0] - frame_predictions.shape[0]
    frame_predictions = np.pad(
        frame_predictions, [(0, pad_length), (0, 0)], 'constant')
  elif frame_predictions.shape[0] > frame_labels.shape[0]:
    # Truncate transcribed frames.
    frame_predictions = frame_predictions[:frame_labels.shape[0], :]

  global_step, _ = session.run([global_step_increment, metrics_to_updates], {
      metric_frame_predictions: frame_predictions,
      metric_frame_labels: frame_labels,
      metric_note_precision: sequence_note_precision,
      metric_note_recall: sequence_note_recall,
      metric_note_f1: sequence_note_f1,
      metric_note_precision_with_offsets: sequence_note_precision_with_offsets,
      metric_note_recall_with_offsets: sequence_note_recall_with_offsets,
      metric_note_f1_with_offsets: sequence_note_f1_with_offsets
  })
  # Running the summary op separately ensures that all of the metrics have been
  # updated before we try to query them.
  summary = session.run(summary_op)

  tf.logging.info(
      'Writing score summary for %s: Step= %d, Note F1=%f',
      sequence_id, global_step, sequence_note_f1)
  summary_writer.add_summary(summary, global_step)
  summary_writer.flush()

  return sequence_label
示例#30
0
            t += INCREMENT
        j += 1

    if diff in limits:
        name = "%d-%d-chords" % (prev, diff)

        piano_midi.instruments.append(
            piano)  # Append the piano instrument to the midi file
        piano_midi.write("%s.mid" % name)

        file = open("%s.txt" % name,
                    "w")  # Text file with the chord information
        first = True
        for note in piano_midi.instruments[0].notes:  # Going through the notes
            file.write(
                "%s " %
                pretty_midi.note_number_to_hz(note.pitch))  # Note frequency
            if not first:
                file.write("%s " % note.start)  # Note onset
                file.write("%s\n" % note.end)  # Note offset
                first = True
            else:
                first = False
        file.close()

        if limits[-1] != diff:
            prev = diff + 1
            piano_midi = pretty_midi.PrettyMIDI()
            piano = pretty_midi.Instrument(program=piano_program)
            t = INITIAL
示例#31
0
def unpack_sample(name='', concat=False):
    if name == '':
        pathlist = list(pathlib.Path('Samples').glob('**/*.npy'))
        name = str(pathlist[-1])
    if not os.path.exists(name):
        os.mkdir(name)
    savename = name + '/' + name.split('/')[-1]
    if not '.npy' in name:
        name = name + '.npy'
    samples = np.load(name) > 0
    program_nums = [0, 24, 40, 56, 64, 72]
    is_drum = [False] * CHANNEL_NUM
    if concat:
        sample = np.concatenate([i for i in samples], axis=-1)
        write_piano_rolls_to_midi(sample, program_nums=program_nums, is_drum=is_drum, filename=savename + '.mid')
        tqdm.write(name + '.mid')
        for i, piano_roll in enumerate(sample):
            fig = plt.figure(figsize=(12, 4))
            librosa.display.specshow(piano_roll, x_axis='time', y_axis='cqt_note', hop_length=1, sr=96, fmin=pm.note_number_to_hz(12))
            plt.title(savename + '_' + pm.program_to_instrument_name(program_nums[i]))
            fig.savefig(savename + '_' + str(i) + '.png')
            plt.close(fig)
        return
    for id, sample in enumerate(samples):
        write_piano_rolls_to_midi(sample, program_nums=program_nums, is_drum=is_drum, filename=savename + '_' + str(id) + '.mid')
        tqdm.write(savename + '_' + str(id) + '.mid')
        for i, piano_roll in enumerate(sample):
            fig = plt.figure(figsize=(12, 4))
            librosa.display.specshow(piano_roll, x_axis='time', y_axis='cqt_note', hop_length=1, sr=96, fmin=pm.note_number_to_hz(12))
            plt.title(savename + '_' + pm.program_to_instrument_name(program_nums[i]))
            fig.savefig(savename + '_' + str(id) + '_' + str(i) + '.png')
            plt.close(fig)
示例#32
0
def _calculate_metrics_py(
    frame_predictions, onset_predictions, offset_predictions, velocity_values,
    sequence_label_str, frame_labels, sequence_id, hparams):
  """Python logic for calculating metrics on a single example."""
  tf.logging.info('Calculating metrics for %s with length %d', sequence_id,
                  frame_labels.shape[0])
  if not hparams.predict_onset_threshold:
    onset_predictions = None
  if not hparams.predict_offset_threshold:
    offset_predictions = None

  sequence_prediction = sequences_lib.pianoroll_to_note_sequence(
      frames=frame_predictions,
      frames_per_second=data.hparams_frames_per_second(hparams),
      min_duration_ms=0,
      min_midi_pitch=constants.MIN_MIDI_PITCH,
      onset_predictions=onset_predictions,
      offset_predictions=offset_predictions,
      velocity_values=velocity_values)

  sequence_label = music_pb2.NoteSequence.FromString(sequence_label_str)

  if hparams.backward_shift_amount_ms:

    def shift_notesequence(ns_time):
      return ns_time + hparams.backward_shift_amount_ms / 1000.

    shifted_sequence_label, skipped_notes = (
        sequences_lib.adjust_notesequence_times(sequence_label,
                                                shift_notesequence))
    assert skipped_notes == 0
    sequence_label = shifted_sequence_label

  est_intervals, est_pitches, est_velocities = (
      infer_util.sequence_to_valued_intervals(sequence_prediction))

  ref_intervals, ref_pitches, ref_velocities = (
      infer_util.sequence_to_valued_intervals(sequence_label))

  note_precision, note_recall, note_f1, _ = (
      mir_eval.transcription.precision_recall_f1_overlap(
          ref_intervals,
          pretty_midi.note_number_to_hz(ref_pitches),
          est_intervals,
          pretty_midi.note_number_to_hz(est_pitches),
          offset_ratio=None))

  (note_with_offsets_precision, note_with_offsets_recall, note_with_offsets_f1,
   _) = (
       mir_eval.transcription.precision_recall_f1_overlap(
           ref_intervals, pretty_midi.note_number_to_hz(ref_pitches),
           est_intervals, pretty_midi.note_number_to_hz(est_pitches)))

  (note_with_offsets_velocity_precision, note_with_offsets_velocity_recall,
   note_with_offsets_velocity_f1, _) = (
       mir_eval.transcription_velocity.precision_recall_f1_overlap(
           ref_intervals=ref_intervals,
           ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
           ref_velocities=ref_velocities,
           est_intervals=est_intervals,
           est_pitches=pretty_midi.note_number_to_hz(est_pitches),
           est_velocities=est_velocities))

  processed_frame_predictions = sequences_lib.sequence_to_pianoroll(
      sequence_prediction,
      frames_per_second=data.hparams_frames_per_second(hparams),
      min_pitch=constants.MIN_MIDI_PITCH,
      max_pitch=constants.MAX_MIDI_PITCH).active

  if processed_frame_predictions.shape[0] < frame_labels.shape[0]:
    # Pad transcribed frames with silence.
    pad_length = frame_labels.shape[0] - processed_frame_predictions.shape[0]
    processed_frame_predictions = np.pad(processed_frame_predictions,
                                         [(0, pad_length), (0, 0)], 'constant')
  elif processed_frame_predictions.shape[0] > frame_labels.shape[0]:
    # Truncate transcribed frames.
    processed_frame_predictions = (
        processed_frame_predictions[:frame_labels.shape[0], :])

  tf.logging.info(
      'Metrics for %s: Note F1 %f, Note w/ offsets F1 %f, '
      'Note w/ offsets & velocity: %f', sequence_id, note_f1,
      note_with_offsets_f1, note_with_offsets_velocity_f1)
  return (note_precision, note_recall, note_f1, note_with_offsets_precision,
          note_with_offsets_recall, note_with_offsets_f1,
          note_with_offsets_velocity_precision,
          note_with_offsets_velocity_recall, note_with_offsets_velocity_f1,
          processed_frame_predictions)