Пример #1
0
def evaluate(model, batch, device):
    metrics = defaultdict(list)
    batch = allocate_batch(batch, device)

    frame_logit, onset_logit = model(batch['audio'])

    criterion = nn.BCEWithLogitsLoss()
    frame_loss = criterion(frame_logit, batch['frame'])
    onset_loss = criterion(frame_logit, batch['onset'])
    metrics['metric/loss/frame_loss'].append(frame_loss.cpu().numpy())
    metrics['metric/loss/onset_loss'].append(onset_loss.cpu().numpy())

    for n in range(batch['audio'].shape[0]):
        frame_pred = th.sigmoid(frame_logit[n])
        onset_pred = th.sigmoid(onset_logit[n])

        pr, re, f1 = framewise_eval(frame_pred, batch['frame'][n])
        metrics['metric/frame/frame_precision'].append(pr)
        metrics['metric/frame/frame_recall'].append(re)
        metrics['metric/frame/frame_f1'].append(f1)

        pr, re, f1 = framewise_eval(onset_pred, batch['onset'][n])
        metrics['metric/frame/onset_precision'].append(pr)
        metrics['metric/frame/onset_recall'].append(re)
        metrics['metric/frame/onset_f1'].append(f1)

        p_est, i_est = extract_notes(onset_pred, frame_pred)
        p_ref, i_ref = extract_notes(batch['onset'][n], batch['frame'][n])

        scaling = HOP_SIZE / SAMPLE_RATE

        i_ref = (i_ref * scaling).reshape(-1, 2)
        p_ref = np.array([midi_to_hz(MIN_MIDI + pitch) for pitch in p_ref])
        i_est = (i_est * scaling).reshape(-1, 2)
        p_est = np.array([midi_to_hz(MIN_MIDI + pitch) for pitch in p_est])

        p, r, f, o = evaluate_notes(i_ref,
                                    p_ref,
                                    i_est,
                                    p_est,
                                    offset_ratio=None)
        metrics['metric/note/precision'].append(p)
        metrics['metric/note/recall'].append(r)
        metrics['metric/note/f1'].append(f)
        metrics['metric/note/overlap'].append(o)

        p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est)
        metrics['metric/note-with-offsets/precision'].append(p)
        metrics['metric/note-with-offsets/recall'].append(r)
        metrics['metric/note-with-offsets/f1'].append(f)
        metrics['metric/note-with-offsets/overlap'].append(o)

    return metrics
Пример #2
0
def train(model_type,
          logdir,
          batch_size,
          iterations,
          validation_interval,
          sequence_length,
          learning_rate,
          weight_decay,
          cnn_unit,
          fc_unit,
          debug=False,
          save_midi=False):
    if logdir is None:
        logdir = Path('runs') / ('exp_' +
                                 datetime.now().strftime('%y%m%d-%H%M%S'))
    Path(logdir).mkdir(parents=True, exist_ok=True)

    if sequence_length % HOP_SIZE != 0:
        adj_length = sequence_length // HOP_SIZE * HOP_SIZE
        print(
            f'sequence_length: {sequence_length} is not divide by {HOP_SIZE}.\n \
                adjusted into : {adj_length}')
        sequence_length = adj_length

    if debug:
        dataset = MAESTRO_small(groups=['debug'],
                                sequence_length=sequence_length,
                                hop_size=HOP_SIZE,
                                random_sample=True)
        valid_dataset = dataset
        iterations = 100
        validation_interval = 10
    else:
        dataset = MAESTRO_small(groups=['train'],
                                sequence_length=sequence_length,
                                hop_size=HOP_SIZE,
                                random_sample=True)
        valid_dataset = MAESTRO_small(groups=['validation'],
                                      sequence_length=sequence_length,
                                      hop_size=HOP_SIZE,
                                      random_sample=False)
    loader = DataLoader(dataset, batch_size, shuffle=True)

    device = th.device('cuda') if th.cuda.is_available() else th.device('cpu')

    if model_type == 'baseline':
        model = Transcriber(cnn_unit=cnn_unit, fc_unit=fc_unit)
    elif model_type == 'rnn':
        model = Transcriber_RNN(cnn_unit=cnn_unit, fc_unit=fc_unit)
    elif model_type == 'crnn':
        model = Transcriber_CRNN(cnn_unit=cnn_unit, fc_unit=fc_unit)
    elif model_type == 'ONF':
        model = Transcriber_ONF(cnn_unit=cnn_unit, fc_unit=fc_unit)
    optimizer = th.optim.Adam(model.parameters(),
                              learning_rate,
                              weight_decay=weight_decay)
    scheduler = StepLR(optimizer, step_size=1000, gamma=0.98)
    criterion = nn.BCEWithLogitsLoss()

    model = model.to(device)

    loop = tqdm(range(1, iterations + 1))

    for step, batch in zip(loop, cycle(loader)):
        optimizer.zero_grad()
        batch = allocate_batch(batch, device)

        frame_logit, onset_logit = model(batch['audio'])
        frame_loss = criterion(frame_logit, batch['frame'])
        onset_loss = criterion(onset_logit, batch['onset'])
        loss = onset_loss + frame_loss

        loss.mean().backward()

        for parameter in model.parameters():
            clip_grad_norm_([parameter], 3.0)

        optimizer.step()
        scheduler.step()
        loop.set_postfix_str("loss: {:.3e}".format(loss.mean()))

        if step % validation_interval == 0:
            model.eval()
            with th.no_grad():
                loader = DataLoader(valid_dataset,
                                    batch_size=batch_size,
                                    shuffle=False)
                metrics = defaultdict(list)
                for batch in loader:
                    batch_results = evaluate(model, batch, device)

                    for key, value in batch_results.items():
                        metrics[key].extend(value)
            print('')
            for key, value in metrics.items():
                if key[-2:] == 'f1' or 'loss' in key:
                    print(f'{key:27} : {np.mean(value):.4f}')
            model.train()

    th.save(
        {
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'step': step,
            'cnn_unit': cnn_unit,
            'fc_unit': fc_unit
        },
        Path(logdir) / f'model-{step}.pt')
    del dataset, valid_dataset

    test_dataset = MAESTRO_small(groups=['test'],
                                 hop_size=HOP_SIZE,
                                 random_sample=False)
    model.eval()
    with th.no_grad():
        loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
        metrics = defaultdict(list)
        for batch in loader:
            batch_results = evaluate(model,
                                     batch,
                                     device,
                                     save=save_midi,
                                     save_path=logdir)
            for key, value in batch_results.items():
                metrics[key].extend(value)
    print('')
    for key, value in metrics.items():
        if key[-2:] == 'f1' or 'loss' in key:
            print(f'{key} : {np.mean(value)}')

    with open(Path(logdir) / 'results.txt', 'w') as f:
        for key, values in metrics.items():
            _, category, name = key.split('/')
            metric_string = f'{category:>32} {name:26}: {np.mean(values):.3f} +- {np.std(values):.3f}'
            print(metric_string)
            f.write(metric_string + '\n')
Пример #3
0
def evaluate(model, batch, device, save=False, save_path=None):
    metrics = defaultdict(list)
    batch = allocate_batch(batch, device)

    frame_logit, onset_logit = model(batch['audio'])

    criterion = nn.BCEWithLogitsLoss()
    frame_loss = criterion(frame_logit, batch['frame'])
    onset_loss = criterion(frame_logit, batch['onset'])
    metrics['metric/loss/frame_loss'].append(frame_loss.cpu().numpy())
    metrics['metric/loss/onset_loss'].append(onset_loss.cpu().numpy())

    for n in range(batch['audio'].shape[0]):
        frame_pred = th.sigmoid(frame_logit[n])
        onset_pred = th.sigmoid(onset_logit[n])

        pr, re, f1 = framewise_eval(frame_pred, batch['frame'][n])
        metrics['metric/frame/frame_precision'].append(pr)
        metrics['metric/frame/frame_recall'].append(re)
        metrics['metric/frame/frame_f1'].append(f1)

        pr, re, f1 = framewise_eval(onset_pred, batch['onset'][n])
        metrics['metric/frame/onset_precision'].append(pr)
        metrics['metric/frame/onset_recall'].append(re)
        metrics['metric/frame/onset_f1'].append(f1)

        p_est, i_est = extract_notes(onset_pred, frame_pred)
        p_ref, i_ref = extract_notes(batch['onset'][n], batch['frame'][n])

        scaling = HOP_SIZE / SAMPLE_RATE

        i_ref = (i_ref * scaling).reshape(-1, 2)
        p_ref = np.array([midi_to_hz(MIN_MIDI + pitch) for pitch in p_ref])
        i_est = (i_est * scaling).reshape(-1, 2)
        p_est = np.array([midi_to_hz(MIN_MIDI + pitch) for pitch in p_est])

        p, r, f, o = evaluate_notes(i_ref,
                                    p_ref,
                                    i_est,
                                    p_est,
                                    offset_ratio=None)
        metrics['metric/note/precision'].append(p)
        metrics['metric/note/recall'].append(r)
        metrics['metric/note/f1'].append(f)
        metrics['metric/note/overlap'].append(o)

        p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est)
        metrics['metric/note-with-offsets/precision'].append(p)
        metrics['metric/note-with-offsets/recall'].append(r)
        metrics['metric/note-with-offsets/f1'].append(f)
        metrics['metric/note-with-offsets/overlap'].append(o)

        if save:
            if len(p_est) == 0:
                print(
                    f'no onset detected. skip: {Path(batch["path"][n]).stem}')
            midi_filename = Path(save_path) / (Path(batch['path'][n]).stem +
                                               '.midi')
            save_midi(midi_filename, p_est, i_est, [64] * len(p_est))

            wav_filename = Path(save_path) / (Path(batch['path'][n]).stem +
                                              '.wav')
            midi_file = pretty_midi.PrettyMIDI(str(midi_filename))
            synth_audio = midi_file.fluidsynth(fs=16000)
            soundfile.write(wav_filename, synth_audio, 16000)

    return metrics
Пример #4
0
def train(model_type,
          logdir,
          batch_size,
          iterations,
          validation_interval,
          sequence_length,
          learning_rate,
          weight_decay,
          cnn_unit,
          fc_unit,
          debug=False,
          save_midi=False,
          n_train=1):
    # Set the log directory
    if logdir is None:
        logdir = Path('runs') / ('exp_' +
                                 datetime.now().strftime('%y%m%d-%H%M%S') +
                                 '_' + model_type)
    Path(logdir).mkdir(parents=True, exist_ok=True)

    # Make sequence length as the multiples of HOP_SIZE -> why?
    if sequence_length % HOP_SIZE != 0:
        adj_length = sequence_length // HOP_SIZE * HOP_SIZE
        print(
            f'sequence_length: {sequence_length} is not divide by {HOP_SIZE}.\n \
                adjusted into : {adj_length}')
        sequence_length = adj_length

    # Dataset setting
    if debug:
        dataset = MAESTRO_small(groups=['debug'],
                                sequence_length=sequence_length,
                                hop_size=HOP_SIZE,
                                random_sample=True)
        valid_dataset = dataset
        iterations = 100
        validation_interval = 10
    else:
        dataset = MAESTRO_small(groups=['train'],
                                sequence_length=sequence_length,
                                hop_size=HOP_SIZE,
                                random_sample=True)
        valid_dataset = MAESTRO_small(groups=['validation'],
                                      sequence_length=sequence_length,
                                      hop_size=HOP_SIZE,
                                      random_sample=False)
    loader = DataLoader(dataset, batch_size, shuffle=True)

    # Device setting
    device = th.device('cuda') if th.cuda.is_available() else th.device('cpu')
    print(th.cuda.device_count(), th.cuda.current_device())
    # Model setting
    if model_type == 'baseline':
        model = Transcriber(cnn_unit=cnn_unit, fc_unit=fc_unit)
    elif model_type == 'rnn':
        model = Transcriber_RNN(cnn_unit=cnn_unit, fc_unit=fc_unit)
    elif model_type == 'crnn':
        model = Transcriber_CRNN(cnn_unit=cnn_unit, fc_unit=fc_unit)
    elif model_type == 'ONF':
        model = Transcriber_ONF(cnn_unit=cnn_unit, fc_unit=fc_unit)
    elif model_type == 'udrnn':
        model = Transcriber_udRNN(cnn_unit=cnn_unit, fc_unit=fc_unit)
    optimizer = th.optim.Adam(model.parameters(),
                              learning_rate,
                              weight_decay=weight_decay)
    scheduler = StepLR(optimizer, step_size=1000, gamma=0.98)
    criterion = nn.BCEWithLogitsLoss()

    model = model.to(device)

    # Training : why not using batch enumerate and using custom cycle function
    loop = tqdm(range(1, iterations + 1))

    try:
        for step, batch in zip(loop, cycle(loader)):
            optimizer.zero_grad()
            batch = allocate_batch(batch, device)  # oh this is useful

            # Feed the input to model(audio -> frame and onset logit : just a classification)
            frame_logit, onset_logit = model(batch['audio'])
            frame_loss = criterion(frame_logit, batch['frame'])
            onset_loss = criterion(onset_logit, batch['onset'])
            loss = onset_loss + frame_loss

            loss.mean().backward()

            # What clip_grad_norm does?
            for parameter in model.parameters():
                clip_grad_norm_([parameter], 3.0)

            optimizer.step()
            scheduler.step()
            loop.set_postfix_str("loss: {:.3e}".format(loss.mean()))

            if step % validation_interval == 0:
                model.eval()
                with th.no_grad():
                    loader = DataLoader(valid_dataset,
                                        batch_size=batch_size,
                                        shuffle=False)
                    metrics = defaultdict(list)
                    for batch in loader:
                        batch_results = evaluate(model, batch, device)

                        for key, value in batch_results.items():
                            metrics[key].extend(value)
                print('')
                with open(Path(logdir) / 'results.txt', 'a+') as f:
                    for key, value in metrics.items():
                        if key[-2:] == 'f1' or 'loss' in key:
                            eval_string = f'{key:27} : {np.mean(value):.4f}'
                            print(eval_string)
                            f.write(eval_string + '\n')
                    f.write('\n')
                model.train()
    except KeyboardInterrupt:  # ctrl + C early stopping
        with open(Path(logdir) / 'results.txt', 'a+') as f:
            dashes = '-' * 100
            print(dashes)
            f.write(dashes + '\n')
            early_log = 'Exiting from training early'
            print(early_log)
            f.write(early_log + '\n')

    # Save the results and delete dataset
    th.save(
        {
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'step': step,
            'cnn_unit': cnn_unit,
            'fc_unit': fc_unit
        },
        Path(logdir) / f'model-{step}.pt')
    del dataset, valid_dataset

    test_dataset = MAESTRO_small(groups=['test'],
                                 hop_size=HOP_SIZE,
                                 random_sample=False)
    model.eval()
    with th.no_grad():
        loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
        metrics = defaultdict(list)
        for batch in loader:
            batch_results = evaluate(model,
                                     batch,
                                     device,
                                     save=save_midi,
                                     save_path=logdir)
            for key, value in batch_results.items():
                metrics[key].extend(value)
    print('')
    for key, value in metrics.items():
        if key[-2:] == 'f1' or 'loss' in key:
            print(f'{key} : {np.mean(value)}')

    with open(Path(logdir) / 'results.txt', 'a+') as f:
        for key, values in metrics.items():
            _, category, name = key.split('/')
            metric_string = f'{category:>32} {name:26}: {np.mean(values):.3f} +- {np.std(values):.3f}'
            print(metric_string)
            f.write(metric_string + '\n')