Пример #1
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: task_id - (optional) To chose the system configuration in parameters.py.
                                (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this.
                              (default) 1

    """
    if len(argv) != 3:
        print('\n\n')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('The code expected two optional inputs')
        print('\t>> python seld.py <task-id> <job-id>')
        print(
            '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py'
        )
        print('Using default inputs for now')
        print(
            '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
            'You can use any number or string for this.')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('\n\n')

    # use parameter set defined by user
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 3 else argv[-1]

    train_splits, val_splits, test_splits = None, None, None
    if params['mode'] == 'dev':
        test_splits = [1, 2, 3, 4]
        val_splits = [2, 3, 4, 1]
        train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]]

        # SUGGESTION: Considering the long training time, major tuning of the method can be done on the first split.
        # Once you finlaize the method you can evaluate its performance on the complete cross-validation splits
        # test_splits = [1]
        # val_splits = [2]
        # train_splits = [[3, 4]]

    elif params['mode'] == 'eval':
        test_splits = [0]
        val_splits = [1]
        train_splits = [[2, 3, 4]]

    avg_scores_val = []
    avg_scores_test = []
    for split_cnt, split in enumerate(test_splits):
        print(
            '\n\n---------------------------------------------------------------------------------------------------'
        )
        print(
            '------------------------------------      SPLIT {}   -----------------------------------------------'
            .format(split))
        print(
            '---------------------------------------------------------------------------------------------------'
        )

        # Unique name for the run
        cls_feature_class.create_folder(params['model_dir'])
        unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id,
                                                   params['dataset'],
                                                   params['mode'], split)
        unique_name = os.path.join(params['model_dir'], unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print("unique_name: {}\n".format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=train_splits[split_cnt],
            batch_size=params['batch_size'],
            seq_len=params['sequence_length'],
            feat_label_dir=params['feat_label_dir'])

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=val_splits[split_cnt],
            batch_size=params['batch_size'],
            seq_len=params['sequence_length'],
            feat_label_dir=params['feat_label_dir'],
            shuffle=False)

        # Collect the reference labels for validation data
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))

        gt = collect_test_labels(data_gen_val, data_out, params['quick_test'])
        sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
        doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

        # rescaling the reference elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
        nb_classes = data_gen_train.get_nb_classes()
        def_elevation = data_gen_train.get_default_elevation()
        doa_gt[:,
               nb_classes:] = doa_gt[:, nb_classes:] / (180. / def_elevation)

        print(
            'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, pool_size{}\n\trnn_size: {}, fnn_size: {}\n'
            .format(params['dropout_rate'], params['nb_cnn2d_filt'],
                    params['pool_size'], params['rnn_size'],
                    params['fnn_size']))

        model = keras_model.get_model(data_in=data_in,
                                      data_out=data_out,
                                      dropout_rate=params['dropout_rate'],
                                      nb_cnn2d_filt=params['nb_cnn2d_filt'],
                                      pool_size=params['pool_size'],
                                      rnn_size=params['rnn_size'],
                                      fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'])
        best_seld_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        seld_metric = np.zeros(params['nb_epochs'])
        tr_loss = np.zeros(params['nb_epochs'])
        val_loss = np.zeros(params['nb_epochs'])
        doa_metric = np.zeros((params['nb_epochs'], 6))
        sed_metric = np.zeros((params['nb_epochs'], 2))
        nb_epoch = 2 if params['quick_test'] else params['nb_epochs']

        # start training
        for epoch_cnt in range(nb_epoch):
            start = time.time()

            # train once per epoch
            hist = model.fit_generator(
                generator=data_gen_train.generate(),
                steps_per_epoch=2 if params['quick_test'] else
                data_gen_train.get_total_batches_in_data(),
                validation_data=data_gen_val.generate(),
                validation_steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                epochs=params['epochs_per_fit'],
                verbose=2)
            tr_loss[epoch_cnt] = hist.history.get('loss')[-1]
            val_loss[epoch_cnt] = hist.history.get('val_loss')[-1]

            # predict once per peoch
            pred = model.predict_generator(
                generator=data_gen_val.generate(),
                steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                verbose=2)

            # Calculate the metrics
            sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5
            doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1])

            # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
            doa_pred[:,
                     nb_classes:] = doa_pred[:, nb_classes:] / (180. /
                                                                def_elevation)

            sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(
                sed_pred, sed_gt, data_gen_val.nb_frames_1s())
            doa_metric[
                epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr(
                    doa_pred, doa_gt, sed_pred, sed_gt)
            seld_metric[epoch_cnt] = evaluation_metrics.compute_seld_metric(
                sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :])

            # Visualize the metrics with respect to epochs
            plot_functions(unique_name, tr_loss, val_loss, sed_metric,
                           doa_metric, seld_metric)

            patience_cnt += 1
            if seld_metric[epoch_cnt] < best_seld_metric:
                best_seld_metric = seld_metric[epoch_cnt]
                best_epoch = epoch_cnt
                model.save(model_name)
                patience_cnt = 0

            print(
                'epoch_cnt: %d, time: %.2fs, tr_loss: %.2f, val_loss: %.2f, '
                'ER_overall: %.2f, F1_overall: %.2f, '
                'doa_error_pred: %.2f, good_pks_ratio:%.2f, '
                'seld_score: %.2f, best_seld_score: %.2f, best_epoch : %d\n' %
                (epoch_cnt, time.time() - start, tr_loss[epoch_cnt],
                 val_loss[epoch_cnt], sed_metric[epoch_cnt,
                                                 0], sed_metric[epoch_cnt, 1],
                 doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1],
                 seld_metric[epoch_cnt], best_seld_metric, best_epoch))
            if patience_cnt > params['patience']:
                break

        avg_scores_val.append([
            sed_metric[best_epoch, 0], sed_metric[best_epoch, 1],
            doa_metric[best_epoch, 0], doa_metric[best_epoch,
                                                  1], best_seld_metric
        ])
        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSELD_score: {}'.format(best_seld_metric))
        print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format(
            doa_metric[best_epoch, 0], doa_metric[best_epoch, 1]))
        print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
            sed_metric[best_epoch, 0], sed_metric[best_epoch, 1]))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print('Loading testing dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=split,
            batch_size=params['batch_size'],
            seq_len=params['sequence_length'],
            feat_label_dir=params['feat_label_dir'],
            shuffle=False,
            per_file=params['dcase_output'],
            is_eval=True if params['mode'] is 'eval' else False)

        print(
            '\nLoading the best model and predicting results on the testing split'
        )
        model = load_model('{}_model.h5'.format(unique_name))
        pred_test = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=2 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            verbose=2)

        test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5
        test_doa_pred = evaluation_metrics.reshape_3Dto2D(pred_test[1])

        # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
        test_doa_pred[:, nb_classes:] = test_doa_pred[:, nb_classes:] / (
            180. / def_elevation)

        if params['dcase_output']:
            # Dump results in DCASE output format for calculating final scores
            dcase_dump_folder = os.path.join(
                params['dcase_dir'],
                '{}_{}_{}'.format(task_id, params['dataset'], params['mode']))
            cls_feature_class.create_folder(dcase_dump_folder)
            print('Dumping recording-wise results in: {}'.format(
                dcase_dump_folder))

            test_filelist = data_gen_test.get_filelist()
            # Number of frames for a 60 second audio with 20ms hop length = 3000 frames
            max_frames_with_content = data_gen_test.get_nb_frames()

            # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with
            # zero padding in the remaining frames
            frames_per_file = data_gen_test.get_frame_per_file()

            for file_cnt in range(test_sed_pred.shape[0] // frames_per_file):
                output_file = os.path.join(
                    dcase_dump_folder,
                    test_filelist[file_cnt].replace('.npy', '.csv'))
                dc = file_cnt * frames_per_file
                output_dict = evaluation_metrics.regression_label_format_to_output_format(
                    data_gen_test,
                    test_sed_pred[dc:dc + max_frames_with_content, :],
                    test_doa_pred[dc:dc + max_frames_with_content, :] * 180 /
                    np.pi)
                evaluation_metrics.write_output_format_file(
                    output_file, output_dict)

        if params['mode'] is 'dev':
            test_data_in, test_data_out = data_gen_test.get_data_sizes()
            test_gt = collect_test_labels(data_gen_test, test_data_out,
                                          params['quick_test'])
            test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0])
            test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1])
            # rescaling the reference elevation from [-180 180] to [-def_elevation def_elevation] for scoring purpose
            test_doa_gt[:, nb_classes:] = test_doa_gt[:, nb_classes:] / (
                180. / def_elevation)

            test_sed_loss = evaluation_metrics.compute_sed_scores(
                test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
            test_doa_loss = evaluation_metrics.compute_doa_scores_regr(
                test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt)
            test_metric_loss = evaluation_metrics.compute_seld_metric(
                test_sed_loss, test_doa_loss)

            avg_scores_test.append([
                test_sed_loss[0], test_sed_loss[1], test_doa_loss[0],
                test_doa_loss[1], test_metric_loss
            ])
            print('Results on test split:')
            print('\tSELD_score: {},  '.format(test_metric_loss))
            print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format(
                test_doa_loss[0], test_doa_loss[1]))
            print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
                test_sed_loss[0], test_sed_loss[1]))

    print('\n\nValidation split scores per fold:\n')
    for cnt in range(len(val_splits)):
        print(
            '\tSplit {} - SED ER: {} F1: {}; DOA error: {} frame recall: {}; SELD score: {}'
            .format(cnt, avg_scores_val[cnt][0], avg_scores_val[cnt][1],
                    avg_scores_val[cnt][2], avg_scores_val[cnt][3],
                    avg_scores_val[cnt][4]))

    if params['mode'] is 'dev':
        print('\n\nTesting split scores per fold:\n')
        for cnt in range(len(val_splits)):
            print(
                '\tSplit {} - SED ER: {} F1: {}; DOA error: {} frame recall: {}; SELD score: {}'
                .format(cnt, avg_scores_test[cnt][0], avg_scores_test[cnt][1],
                        avg_scores_test[cnt][2], avg_scores_test[cnt][3],
                        avg_scores_test[cnt][4]))
Пример #2
0
def main(argv):
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    train_splits, val_splits, test_splits = None, None, None
    if params['mode'] == 'dev':
        # test_splits = [1, 2, 3, 4]
        # val_splits = [2, 3, 4, 1]
        # train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]]
        # TODO for debug only
        test_splits = [1]
        val_splits = [1]
        train_splits = [[1, 1]]

        # SUGGESTION: Considering the long training time, major tuning of the method can be done on the first split.
        # Once you finlaize the method you can evaluate its performance on the complete cross-validation splits
        # test_splits = [1]
        # val_splits = [2]
        # train_splits = [[3, 4]]

    elif params['mode'] == 'eval':
        test_splits = [0]
        val_splits = [1]
        train_splits = [[2, 3, 4]]

    # ------------------  Calculate metric scores for unseen test split ---------------------------------
    print('Loading testing dataset:')
    data_gen_test = cls_data_generator.DataGenerator(
        dataset=params['dataset'],
        split=split,
        batch_size=params['batch_size'],
        seq_len=params['sequence_length'],
        feat_label_dir=params['feat_label_dir'],
        shuffle=False,
        per_file=params['dcase_output'],
        is_eval=True if params['mode'] is 'eval' else False)

    # print('\nLoading the best model and predicting results on the testing split')
    # model = load_model('{}_model.h5'.format(unique_name))
    # pred_test = model.predict_generator(
    #     generator=data_gen_test.generate(),
    #     steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(),
    #     verbose=2
    # )

    test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5
    test_doa_pred = evaluation_metrics.reshape_3Dto2D(pred_test[1])

    # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
    test_doa_pred[:, nb_classes:] = test_doa_pred[:, nb_classes:] / (
        180. / def_elevation)

    if params['dcase_output']:
        # Dump results in DCASE output format for calculating final scores
        dcase_dump_folder = os.path.join(
            params['dcase_dir'], '{}_{}_{}'.format(task_id, params['dataset'],
                                                   params['mode']))
        cls_feature_class.create_folder(dcase_dump_folder)
        print(
            'Dumping recording-wise results in: {}'.format(dcase_dump_folder))

        test_filelist = data_gen_test.get_filelist()
        # Number of frames for a 60 second audio with 20ms hop length = 3000 frames
        max_frames_with_content = data_gen_test.get_nb_frames()

        # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with
        # zero padding in the remaining frames
        frames_per_file = data_gen_test.get_frame_per_file()

        for file_cnt in range(test_sed_pred.shape[0] // frames_per_file):
            output_file = os.path.join(
                dcase_dump_folder,
                test_filelist[file_cnt].replace('.npy', '.csv'))
            dc = file_cnt * frames_per_file
            output_dict = evaluation_metrics.regression_label_format_to_output_format(
                data_gen_test,
                test_sed_pred[dc:dc + max_frames_with_content, :],
                test_doa_pred[dc:dc + max_frames_with_content, :] * 180 /
                np.pi)
            evaluation_metrics.write_output_format_file(
                output_file, output_dict)

    if params['mode'] is 'dev':
        _, _, test_data_out = data_gen_test.get_data_sizes()
        test_gt = collect_test_labels(data_gen_test, test_data_out,
                                      params['quick_test'])
        test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0])
        test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1])
        # rescaling the reference elevation from [-180 180] to [-def_elevation def_elevation] for scoring purpose
        test_doa_gt[:, nb_classes:] = test_doa_gt[:, nb_classes:] / (
            180. / def_elevation)

        test_sed_loss = evaluation_metrics.compute_sed_scores(
            test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
        test_doa_loss = evaluation_metrics.compute_doa_scores_regr(
            test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt)
        test_metric_loss = evaluation_metrics.compute_seld_metric(
            test_sed_loss, test_doa_loss)

        avg_scores_test.append([
            test_sed_loss[0], test_sed_loss[1], test_doa_loss[0],
            test_doa_loss[1], test_metric_loss
        ])
        print('Results on test split:')
        print('\tSELD_score: {},  '.format(test_metric_loss))
        print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format(
            test_doa_loss[0], test_doa_loss[1]))
        print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
            test_sed_loss[0], test_sed_loss[1]))
Пример #3
0
def main(args):
    '''
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1
    '''
    # use parameter set defined by user
    dataset, mode, task_id, job_id = args.dataset, args.mode, args.name, args.job_id
    task = 'sed'
    feat_type = 'mel'
    nb_ch = 4
    doa_type = None
    params, model_params = parameter.get_params(dataset=dataset,
                                                mode=mode,
                                                task_id=task_id,
                                                feat_type=feat_type,
                                                doa=doa_type)

    train_splits, val_splits, test_splits = None, None, None
    if params['mode'] == 'dev':
        test_splits = [1, 2, 3, 4]
        val_splits = [2, 3, 4, 1]
        train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]]

    avg_scores_val = []
    avg_scores_test = []
    for split_cnt, split in enumerate(test_splits):
        print('\nThis is split {}'.format(split_cnt))

        # Unique name for the run
        model_dir_prefix = os.path.join(
            params['model_dir'], task) if task == 'sed' else os.path.join(
                params['model_dir'], 'doa_reg')
        cls_feature_class.create_folder(model_dir_prefix)
        #model_id = int(job_id) + split_cnt
        unique_name = '{}{}_{}_{}_sed_dev_split{}'.format(
            task_id, str(job_id), params['dataset'], params['feat_type'],
            split_cnt + 1)
        unique_name = os.path.join(model_dir_prefix, unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print('\tmodel unique name: {}\n'.format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=train_splits[split_cnt],
            batch_size=params['batch_size'],
            seq_len=params['seq_length'],
            feat_label_dir=params['feat_label_dir'],
            feat_type=feat_type,
            doa=doa_type)

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=val_splits[split_cnt],
            batch_size=params['batch_size'],
            seq_len=3000,
            per_file=True,
            feat_label_dir=params['feat_label_dir'],
            shuffle=False,
            feat_type=feat_type,
            doa=doa_type)

        # Collect the reference labels for validation data
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))

        gt = collect_test_labels_3000(data_gen_val)
        sed_gt = evaluation_metrics.reshape_3Dto2D(gt)  # [3000*100, 11]
        nb_classes = data_gen_train.get_nb_classes()
        def_elevation = data_gen_train.get_default_elevation()
        if task_id == 'crnn':
            model = CUDA(CRNN_SED(data_in, data_out[0]))
        elif task_id == 'mcrnn':
            model = CUDA(MCRNN_SED(data_in, data_out[0]))
        model.apply(kaiming_init)

        total_num = sum(param.numel() for param in model.parameters())
        print('==========================================')
        print('Total parameter number for {}: {}'.format(
            model_params['method'], total_num))
        print('==========================================')

        # Pytorch optimizer
        optimizer = optim.Adam(params=model.parameters(), lr=0.001)
        feat_torch = CUDA(
            Variable(
                torch.FloatTensor(params['batch_size'], nb_ch,
                                  params['seq_length'], params['feat_dim'])))
        label_sed = CUDA(
            Variable(
                torch.FloatTensor(params['batch_size'], params['seq_length'],
                                  11)))
        best_seld_metric = 99999
        best_sed_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        seld_metric = np.zeros(params['nb_epochs'])
        tr_loss = np.zeros(params['nb_epochs'])
        sed_val_loss = np.zeros(params['nb_epochs'])
        sed_metric = np.zeros((params['nb_epochs'], 2))
        nb_epoch = params['nb_epochs']

        # start training
        pbar_epoch = tqdm(total=nb_epoch, desc='[Epoch]')
        for epoch_cnt in range(nb_epoch):
            # train stage
            model.train()
            iter_cnt = 0
            for feat, label in data_gen_train.generate():
                feat_torch.resize_(params['batch_size'], nb_ch,
                                   params['seq_length'], params['feat_dim'])
                feat_torch.data.copy_(torch.from_numpy(feat))

                label_sed.resize_(params['batch_size'], params['seq_length'],
                                  11)
                label_sed.data.copy_(torch.from_numpy(label[0]))
                sed = model(feat_torch)

                sed_loss = bce_loss(sed, label_sed)
                doa_loss = 0.0

                total_loss = sed_loss + doa_loss

                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                if iter_cnt % params['print_iter'] == 0:
                    pbar_epoch.write(
                        'Iteration: {:3d}, sed_loss: {:.4f}, doa_loss: {:.4f}, total_loss: {:.4f}'
                        .format(iter_cnt, sed_loss, doa_loss, total_loss))

                #pbar_iteration.update(1)
                iter_cnt += 1
                if iter_cnt >= data_gen_train.get_total_batches_in_data():
                    break
            iter_cnt = 0
            sed_validation_loss = 0
            entire_pred_sed = np.zeros(
                (data_gen_val._batch_size *
                 data_gen_val.get_total_batches_in_data(), 3000, 11))
            model.eval()
            with torch.no_grad():
                for feat, label in data_gen_val.generate():
                    batch_size = feat.shape[0]

                    feat_torch.resize_(batch_size, nb_ch, 3000,
                                       params['feat_dim'])
                    feat_torch.data.copy_(torch.from_numpy(feat))
                    label_sed.resize_(batch_size, 3000, 11)
                    label_sed.copy_(torch.from_numpy(label[0]))

                    sed = model(feat_torch)
                    sed_loss = bce_loss(sed, label_sed)
                    sed_validation_loss += sed_loss

                    # concat all predictions
                    entire_pred_sed[
                        iter_cnt * batch_size:(iter_cnt + 1) *
                        batch_size, :] = sed.detach().cpu().numpy()
                    iter_cnt += 1
                    if iter_cnt >= data_gen_val.get_total_batches_in_data():
                        break
            sed_validation_loss = sed_validation_loss / data_gen_val.get_total_batches_in_data(
            )

            tr_loss[epoch_cnt] = total_loss
            sed_val_loss[epoch_cnt] = sed_validation_loss

            # Calculate the metrics
            sed_pred = evaluation_metrics.reshape_3Dto2D(
                entire_pred_sed) > params[
                    'threshold']  # compared with threshold
            sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(
                sed_pred, sed_gt, data_gen_val.nb_frames_1s())

            patience_cnt += 1
            if sed_metric[epoch_cnt, 0] < best_sed_metric:
                best_sed_metric = sed_metric[epoch_cnt, 0]
                best_epoch = epoch_cnt
                save_model(model, model_name)
                patience_cnt = 0

            pbar_epoch.update(1)

            pbar_epoch.write(
                'epoch_cnt: %d, sed_tr_loss: %.4f, sed_val_loss: %.4f, ER_overall: %.2f, F1_overall: %.2f, best_sed_ER: %.4f, best_epoch : %d\n'
                % (epoch_cnt, tr_loss[epoch_cnt], sed_val_loss[epoch_cnt],
                   sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1],
                   best_sed_metric, best_epoch))

            if patience_cnt >= params['patience']:
                break

        pbar_epoch.close()

        avg_scores_val.append(
            [sed_metric[best_epoch, 0], sed_metric[best_epoch, 1]]
        )  #, doa_metric[best_epoch, 0], doa_metric[best_epoch, 1], best_seld_metric])
        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
            sed_metric[best_epoch, 0], sed_metric[best_epoch, 1]))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print('Loading testing dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            dataset=params['dataset'],
            split=split,
            batch_size=params['batch_size'],
            seq_len=3000,
            feat_label_dir=params['feat_label_dir'],
            shuffle=False,
            per_file=True,
            is_eval=True if params['mode'] is 'eval' else False,  #False
            feat_type=feat_type,
            doa=doa_type)
        test_batch_size = data_gen_test._batch_size

        print(
            '\nLoading the best model and predicting results on the testing split'
        )
        model = load_model(model, '{}_model.h5'.format(unique_name))
        model.eval()

        # test stage
        total_test_batches = data_gen_test.get_total_batches_in_data()
        pbar_test = tqdm(total=total_test_batches, desc='[Testing]')
        iter_cnt = 0
        entire_test_sed = np.zeros((100, 3000, 11))
        with torch.no_grad():
            if params['mode'] == 'dev':
                for feat, label in data_gen_test.generate():
                    batch_size = feat.shape[0]

                    feat_torch.data.resize_(batch_size, nb_ch, 3000,
                                            params['feat_dim'])
                    feat_torch.data.copy_(torch.from_numpy(feat))

                    sed = model(feat_torch)
                    # concat all predictions
                    entire_test_sed[
                        iter_cnt * test_batch_size:(iter_cnt + 1) *
                        test_batch_size, :] = sed.detach().cpu().numpy()
                    pbar_test.update(1)
                    iter_cnt += 1
                    if iter_cnt >= data_gen_test.get_total_batches_in_data():
                        break
        print('the test batch_size is{}'.format(batch_size))
        pbar_test.close()

        test_sed_pred = evaluation_metrics.reshape_3Dto2D(
            entire_test_sed) > params['threshold']
        if params['mode'] == 'dev':
            _, test_data_out = data_gen_test.get_data_sizes()
            test_gt = collect_test_labels_3000(data_gen_test)
            test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt)
            test_sed_loss = evaluation_metrics.compute_sed_scores(
                test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
            avg_scores_test.append([test_sed_loss[0], test_sed_loss[1]])
            print('Results on test split:')
            print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(
                test_sed_loss[0], test_sed_loss[1]))

    print('\n\nValidation split scores per fold:\n')
    for cnt in range(len(val_splits)):
        print('\t Split {} - SED ER: {} F1: {}'.format(val_splits[cnt],
                                                       avg_scores_val[cnt][0],
                                                       avg_scores_val[cnt][1]))

    if params['mode'] == 'dev':
        print('\n\nTesting split scores per fold:\n')
        for cnt in range(len(val_splits)):
            print('\t Split {} - SED ER: {} F1: {}'.format(
                test_splits[cnt], avg_scores_test[cnt][0],
                avg_scores_test[cnt][1]))
Пример #4
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: task_id - (optional) To chose the system configuration in parameters.py.
                                (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this.
                              (default) 1

    """
    print(argv)
    if len(argv) != 3:
        print('\n\n')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('The code expected two optional inputs')
        print('\t>> python seld.py <task-id> <job-id>')
        print(
            '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py'
        )
        print('Using default inputs for now')
        print(
            '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
            'You can use any number or string for this.')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('\n\n')

    # use parameter set defined by user
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 3 else argv[-1]

    feat_cls = cls_feature_class.FeatureClass(params)
    train_splits, val_splits, test_splits = None, None, None

    if params['mode'] == 'dev':
        test_splits = [6]
        val_splits = [5]
        train_splits = [[1, 2, 3, 4]]

    elif params['mode'] == 'eval':
        test_splits = [[7, 8]]
        val_splits = [[6]]
        train_splits = [[1, 2, 3, 4, 5]]

    for split_cnt, split in enumerate(test_splits):
        print(
            '\n\n---------------------------------------------------------------------------------------------------'
        )
        print(
            '------------------------------------      SPLIT {}   -----------------------------------------------'
            .format(split))
        print(
            '---------------------------------------------------------------------------------------------------'
        )

        # Unique name for the run
        cls_feature_class.create_folder(params['model_dir'])
        unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id,
                                                   params['dataset'],
                                                   params['mode'], split)
        unique_name = os.path.join(params['model_dir'], unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print("unique_name: {}\n".format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            params=params, split=train_splits[split_cnt])

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            params=params,
            split=val_splits[split_cnt],
            shuffle=False,
            per_file=True,
            is_eval=False)

        # Collect the reference labels for validation data
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))

        nb_classes = data_gen_train.get_nb_classes()
        print(
            'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n'
            .format(params['dropout_rate'], params['nb_cnn2d_filt'],
                    params['f_pool_size'], params['t_pool_size'],
                    params['rnn_size'], params['fnn_size'],
                    params['doa_objective']))

        print('Using loss weights : {}'.format(params['loss_weights']))
        model = keras_model.get_model(data_in=data_in,
                                      data_out=data_out,
                                      dropout_rate=params['dropout_rate'],
                                      nb_cnn2d_filt=params['nb_cnn2d_filt'],
                                      f_pool_size=params['f_pool_size'],
                                      t_pool_size=params['t_pool_size'],
                                      rnn_size=params['rnn_size'],
                                      fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'],
                                      doa_objective=params['doa_objective'],
                                      is_accdoa=params['is_accdoa'])

        # Dump results in DCASE output format for calculating final scores
        dcase_output_val_folder = os.path.join(
            params['dcase_output_dir'],
            '{}_{}_{}_val'.format(task_id, params['dataset'], params['mode']))
        cls_feature_class.delete_and_create_folder(dcase_output_val_folder)
        print('Dumping recording-wise val results in: {}'.format(
            dcase_output_val_folder))

        # Initialize evaluation metric class
        score_obj = ComputeSELDResults(params)

        best_seld_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        nb_epoch = 2 if params['quick_test'] else params['nb_epochs']
        tr_loss = np.zeros(nb_epoch)
        seld_metric = np.zeros((nb_epoch, 5))

        # start training
        for epoch_cnt in range(nb_epoch):
            start = time.time()

            # train once per epoch
            hist = model.fit_generator(
                generator=data_gen_train.generate(),
                steps_per_epoch=2 if params['quick_test'] else
                data_gen_train.get_total_batches_in_data(),
                epochs=params['epochs_per_fit'],
                verbose=2,
            )
            tr_loss[epoch_cnt] = hist.history.get('loss')[-1]

            # predict once per epoch
            pred = model.predict_generator(
                generator=data_gen_val.generate(),
                steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                verbose=2)

            if params['is_accdoa']:
                sed_pred, doa_pred = get_accdoa_labels(pred, nb_classes)
                sed_pred = reshape_3Dto2D(sed_pred)
                doa_pred = reshape_3Dto2D(doa_pred)
            else:
                sed_pred = reshape_3Dto2D(pred[0]) > 0.5
                doa_pred = reshape_3Dto2D(pred[1] if params['doa_objective'] is
                                          'mse' else pred[1][:, :,
                                                             nb_classes:])

            # Calculate the DCASE 2021 metrics - Location-aware detection and Class-aware localization scores
            dump_DCASE2021_results(data_gen_val, feat_cls,
                                   dcase_output_val_folder, sed_pred, doa_pred)
            seld_metric[epoch_cnt, :] = score_obj.get_SELD_Results(
                dcase_output_val_folder)

            patience_cnt += 1
            if seld_metric[epoch_cnt, -1] < best_seld_metric:
                best_seld_metric = seld_metric[epoch_cnt, -1]
                best_epoch = epoch_cnt
                model.save(model_name)
                patience_cnt = 0

            print(
                'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, '
                '\n\t\t DCASE2021 SCORES: ER: {:0.2f}, F: {:0.1f}, LE: {:0.1f}, LR:{:0.1f}, seld_score (early stopping score): {:0.2f}, '
                'best_seld_score: {:0.2f}, best_epoch : {}\n'.format(
                    epoch_cnt,
                    time.time() - start, tr_loss[epoch_cnt],
                    seld_metric[epoch_cnt, 0], seld_metric[epoch_cnt, 1] * 100,
                    seld_metric[epoch_cnt, 2], seld_metric[epoch_cnt, 3] * 100,
                    seld_metric[epoch_cnt, -1], best_seld_metric, best_epoch))
            if patience_cnt > params['patience']:
                break

        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSELD_score (early stopping score) : {}'.format(
            best_seld_metric))

        print('\n\tDCASE2021 scores')
        print(
            '\tClass-aware localization scores: Localization Error: {:0.1f}, Localization Recall: {:0.1f}'
            .format(seld_metric[best_epoch, 2],
                    seld_metric[best_epoch, 3] * 100))
        print(
            '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
            .format(seld_metric[best_epoch, 0],
                    seld_metric[best_epoch, 1] * 100))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print(
            '\nLoading the best model and predicting results on the testing split'
        )
        print('\tLoading testing dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            params=params,
            split=split,
            shuffle=False,
            per_file=True,
            is_eval=True if params['mode'] is 'eval' else False)

        model = keras_model.load_seld_model('{}_model.h5'.format(unique_name),
                                            params['doa_objective'])
        pred_test = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=2 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            verbose=2)
        if params['is_accdoa']:
            test_sed_pred, test_doa_pred = get_accdoa_labels(
                pred_test, nb_classes)
            test_sed_pred = reshape_3Dto2D(test_sed_pred)
            test_doa_pred = reshape_3Dto2D(test_doa_pred)
        else:
            test_sed_pred = reshape_3Dto2D(pred_test[0]) > 0.5
            test_doa_pred = reshape_3Dto2D(
                pred_test[1] if params['doa_objective'] is 'mse' else
                pred_test[1][:, :, nb_classes:])

        # Dump results in DCASE output format for calculating final scores
        dcase_output_test_folder = os.path.join(
            params['dcase_output_dir'],
            '{}_{}_{}_test'.format(task_id, params['dataset'], params['mode']))
        cls_feature_class.delete_and_create_folder(dcase_output_test_folder)
        print('Dumping recording-wise test results in: {}'.format(
            dcase_output_test_folder))
        dump_DCASE2021_results(data_gen_test, feat_cls,
                               dcase_output_test_folder, test_sed_pred,
                               test_doa_pred)

        if params['mode'] is 'dev':
            # Calculate DCASE2021 scores
            test_seld_metric = score_obj.get_SELD_Results(
                dcase_output_test_folder)

            print('Results on test split:')
            print('\tDCASE2021 Scores')
            print(
                '\tClass-aware localization scores: Localization Error: {:0.1f}, Localization Recall: {:0.1f}'
                .format(test_seld_metric[2], test_seld_metric[3] * 100))
            print(
                '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
                .format(test_seld_metric[0], test_seld_metric[1] * 100))
            print('\tSELD (early stopping metric): {:0.2f}'.format(
                test_seld_metric[-1]))
Пример #5
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: task_id - (optional) To chose the system configuration in parameters.py.
                                (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this.
                              (default) 1

    """
    print(argv)
    if len(argv) != 3:
        print('\n\n')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('The code expected two optional inputs')
        print('\t>> python seld.py <task-id> <job-id>')
        print(
            '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py'
        )
        print('Using default inputs for now')
        print(
            '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
            'You can use any number or string for this.')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('\n\n')

    # use parameter set defined by user
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 3 else argv[-1]

    feat_cls = cls_feature_class.FeatureClass(params)
    train_splits, val_splits, test_splits = None, None, None

    if params['mode'] == 'dev':
        test_splits = [1]
        val_splits = [2]
        train_splits = [[3, 4, 5, 6]]

    elif params['mode'] == 'eval':
        test_splits = [[7, 8]]
        val_splits = [[1]]
        train_splits = [[2, 3, 4, 5, 6]]

    avg_scores_val = []
    avg_scores_test = []
    for split_cnt, split in enumerate(test_splits):
        print(
            '\n\n---------------------------------------------------------------------------------------------------'
        )
        print(
            '------------------------------------      SPLIT {}   -----------------------------------------------'
            .format(split))
        print(
            '---------------------------------------------------------------------------------------------------'
        )

        # Unique name for the run
        cls_feature_class.create_folder(params['model_dir'])
        unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id,
                                                   params['dataset'],
                                                   params['mode'], split)
        unique_name = os.path.join(params['model_dir'], unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print("unique_name: {}\n".format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            params=params, split=train_splits[split_cnt])

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            params=params, split=val_splits[split_cnt], shuffle=False)

        # Collect the reference labels for validation data
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))

        nb_classes = data_gen_train.get_nb_classes()
        gt = collect_test_labels(data_gen_val, data_out, nb_classes,
                                 params['quick_test'])
        sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
        doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

        print(
            'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n'
            .format(params['dropout_rate'], params['nb_cnn2d_filt'],
                    params['f_pool_size'], params['t_pool_size'],
                    params['rnn_size'], params['fnn_size'],
                    params['doa_objective']))

        print('Using loss weights : {}'.format(params['loss_weights']))
        model = keras_model.get_model(data_in=data_in,
                                      data_out=data_out,
                                      dropout_rate=params['dropout_rate'],
                                      nb_cnn2d_filt=params['nb_cnn2d_filt'],
                                      f_pool_size=params['f_pool_size'],
                                      t_pool_size=params['t_pool_size'],
                                      rnn_size=params['rnn_size'],
                                      fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'],
                                      doa_objective=params['doa_objective'])
        best_seld_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        nb_epoch = 2 if params['quick_test'] else params['nb_epochs']
        seld_metric = np.zeros(nb_epoch)
        new_seld_metric = np.zeros(nb_epoch)
        tr_loss = np.zeros(nb_epoch)
        doa_metric = np.zeros((nb_epoch, 6))
        sed_metric = np.zeros((nb_epoch, 2))
        new_metric = np.zeros((nb_epoch, 4))

        # start training
        for epoch_cnt in range(nb_epoch):
            start = time.time()

            # train once per epoch
            hist = model.fit_generator(
                generator=data_gen_train.generate(),
                steps_per_epoch=2 if params['quick_test'] else
                data_gen_train.get_total_batches_in_data(),
                epochs=params['epochs_per_fit'],
                verbose=2,
            )
            tr_loss[epoch_cnt] = hist.history.get('loss')[-1]

            # predict once per peoch
            pred = model.predict_generator(
                generator=data_gen_val.generate(),
                steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                verbose=2)

            sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5
            doa_pred = evaluation_metrics.reshape_3Dto2D(
                pred[1]
                if params['doa_objective'] is 'mse' else pred[1][:, :,
                                                                 nb_classes:])

            # Calculate the DCASE 2019 metrics - Detection-only and Localization-only scores
            sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(
                sed_pred, sed_gt, data_gen_val.nb_frames_1s())
            doa_metric[
                epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr_xyz(
                    doa_pred, doa_gt, sed_pred, sed_gt)
            seld_metric[epoch_cnt] = evaluation_metrics.early_stopping_metric(
                sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :])

            # Calculate the DCASE 2020 metrics - Location-aware detection and Class-aware localization scores
            cls_new_metric = SELD_evaluation_metrics.SELDMetrics(
                nb_classes=data_gen_val.get_nb_classes(),
                doa_threshold=params['lad_doa_thresh'])
            pred_dict = feat_cls.regression_label_format_to_output_format(
                sed_pred, doa_pred)
            gt_dict = feat_cls.regression_label_format_to_output_format(
                sed_gt, doa_gt)

            pred_blocks_dict = feat_cls.segment_labels(pred_dict,
                                                       sed_pred.shape[0])
            gt_blocks_dict = feat_cls.segment_labels(gt_dict, sed_gt.shape[0])

            cls_new_metric.update_seld_scores_xyz(pred_blocks_dict,
                                                  gt_blocks_dict)
            new_metric[epoch_cnt, :] = cls_new_metric.compute_seld_scores()
            new_seld_metric[
                epoch_cnt] = evaluation_metrics.early_stopping_metric(
                    new_metric[epoch_cnt, :2], new_metric[epoch_cnt, 2:])

            # Visualize the metrics with respect to epochs
            plot_functions(unique_name, tr_loss, sed_metric, doa_metric,
                           seld_metric, new_metric, new_seld_metric)

            patience_cnt += 1
            if new_seld_metric[epoch_cnt] < best_seld_metric:
                best_seld_metric = new_seld_metric[epoch_cnt]
                best_epoch = epoch_cnt
                model.save(model_name)
                patience_cnt = 0

            print(
                'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, '
                '\n\t\t DCASE2019 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, FR:{:0.1f}, seld_score: {:0.2f}, '
                '\n\t\t DCASE2020 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, DE_F:{:0.1f}, seld_score (early stopping score): {:0.2f}, '
                'best_seld_score: {:0.2f}, best_epoch : {}\n'.format(
                    epoch_cnt,
                    time.time() - start, tr_loss[epoch_cnt],
                    sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1] * 100,
                    doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1] * 100,
                    seld_metric[epoch_cnt], new_metric[epoch_cnt, 0],
                    new_metric[epoch_cnt, 1] * 100, new_metric[epoch_cnt, 2],
                    new_metric[epoch_cnt, 3] * 100, new_seld_metric[epoch_cnt],
                    best_seld_metric, best_epoch))
            if patience_cnt > params['patience']:
                break

        avg_scores_val.append([
            new_metric[best_epoch, 0], new_metric[best_epoch, 1],
            new_metric[best_epoch, 2], new_metric[best_epoch,
                                                  3], best_seld_metric
        ])
        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSELD_score (early stopping score) : {}'.format(
            best_seld_metric))

        print('\n\tDCASE2020 scores')
        print(
            '\tClass-aware localization scores: DOA_error: {:0.1f}, F-score: {:0.1f}'
            .format(new_metric[best_epoch, 2],
                    new_metric[best_epoch, 3] * 100))
        print(
            '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
            .format(new_metric[best_epoch, 0],
                    new_metric[best_epoch, 1] * 100))

        print('\n\tDCASE2019 scores')
        print(
            '\tLocalization-only scores: DOA_error: {:0.1f}, Frame recall: {:0.1f}'
            .format(doa_metric[best_epoch, 0],
                    doa_metric[best_epoch, 1] * 100))
        print(
            '\tDetection-only scores: Error rate: {:0.2f}, F-score: {:0.1f}\n'.
            format(sed_metric[best_epoch, 0], sed_metric[best_epoch, 1] * 100))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print(
            '\nLoading the best model and predicting results on the testing split'
        )
        print('\tLoading testing dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            params=params,
            split=split,
            shuffle=False,
            per_file=params['dcase_output'],
            is_eval=True if params['mode'] is 'eval' else False)

        model = keras_model.load_seld_model('{}_model.h5'.format(unique_name),
                                            params['doa_objective'])
        pred_test = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=2 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            verbose=2)

        test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5
        test_doa_pred = evaluation_metrics.reshape_3Dto2D(
            pred_test[1]
            if params['doa_objective'] is 'mse' else pred_test[1][:, :,
                                                                  nb_classes:])

        if params['dcase_output']:
            # Dump results in DCASE output format for calculating final scores
            dcase_dump_folder = os.path.join(
                params['dcase_dir'],
                '{}_{}_{}'.format(task_id, params['dataset'], params['mode']))
            cls_feature_class.create_folder(dcase_dump_folder)
            print('Dumping recording-wise results in: {}'.format(
                dcase_dump_folder))

            test_filelist = data_gen_test.get_filelist()
            # Number of frames for a 60 second audio with 20ms hop length = 3000 frames
            max_frames_with_content = data_gen_test.get_nb_frames()

            # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with
            # zero padding in the remaining frames
            frames_per_file = data_gen_test.get_frame_per_file()

            for file_cnt in range(test_sed_pred.shape[0] // frames_per_file):
                output_file = os.path.join(
                    dcase_dump_folder,
                    test_filelist[file_cnt].replace('.npy', '.csv'))
                dc = file_cnt * frames_per_file
                output_dict = feat_cls.regression_label_format_to_output_format(
                    test_sed_pred[dc:dc + max_frames_with_content, :],
                    test_doa_pred[dc:dc + max_frames_with_content, :])
                data_gen_test.write_output_format_file(output_file,
                                                       output_dict)

        if params['mode'] is 'dev':
            test_data_in, test_data_out = data_gen_test.get_data_sizes()
            test_gt = collect_test_labels(data_gen_test, test_data_out,
                                          nb_classes, params['quick_test'])
            test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0])
            test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1])

            # Calculate DCASE2019 scores
            test_sed_loss = evaluation_metrics.compute_sed_scores(
                test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
            test_doa_loss = evaluation_metrics.compute_doa_scores_regr_xyz(
                test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt)
            test_metric_loss = evaluation_metrics.early_stopping_metric(
                test_sed_loss, test_doa_loss)

            # Calculate DCASE2020 scores
            cls_new_metric = SELD_evaluation_metrics.SELDMetrics(
                nb_classes=data_gen_test.get_nb_classes(), doa_threshold=20)
            test_pred_dict = feat_cls.regression_label_format_to_output_format(
                test_sed_pred, test_doa_pred)
            test_gt_dict = feat_cls.regression_label_format_to_output_format(
                test_sed_gt, test_doa_gt)

            test_pred_blocks_dict = feat_cls.segment_labels(
                test_pred_dict, test_sed_pred.shape[0])
            test_gt_blocks_dict = feat_cls.segment_labels(
                test_gt_dict, test_sed_gt.shape[0])

            cls_new_metric.update_seld_scores_xyz(test_pred_blocks_dict,
                                                  test_gt_blocks_dict)
            test_new_metric = cls_new_metric.compute_seld_scores()
            test_new_seld_metric = evaluation_metrics.early_stopping_metric(
                test_new_metric[:2], test_new_metric[2:])

            avg_scores_test.append([
                test_new_metric[0], test_new_metric[1], test_new_metric[2],
                test_new_metric[3], test_new_seld_metric
            ])
            print('Results on test split:')

            print('\tDCASE2020 Scores')
            print(
                '\tClass-aware localization scores: DOA Error: {:0.1f}, F-score: {:0.1f}'
                .format(test_new_metric[2], test_new_metric[3] * 100))
            print(
                '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
                .format(test_new_metric[0], test_new_metric[1] * 100))
            print('\tSELD (early stopping metric): {:0.2f}'.format(
                test_new_seld_metric))

            print('\n\tDCASE2019 Scores')
            print(
                '\tLocalization-only scores: DOA Error: {:0.1f}, Frame recall: {:0.1f}'
                .format(test_doa_loss[0], test_doa_loss[1] * 100))
            print(
                '\tDetection-only scores:Error rate: {:0.2f}, F-score: {:0.1f}'
                .format(test_sed_loss[0], test_sed_loss[1] * 100))
Пример #6
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.

    :param argv: expects two optional inputs.
        first input: task_id - (optional) To chose the system configuration in parameters.py.
                                (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this.
                              (default) 1

    """
    print(argv)
    if len(argv) != 3:
        print('\n\n')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('The code expected two optional inputs')
        print('\t>> python seld.py <task-id> <job-id>')
        print(
            '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py'
        )
        print('Using default inputs for now')
        print(
            '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
            'You can use any number or string for this.')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('\n\n')

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    torch.autograd.set_detect_anomaly(True)

    # use parameter set defined by user
    task_id = '1' if len(argv) < 2 else argv[1]
    params = doanet_parameters.get_params(task_id)

    job_id = 1 if len(argv) < 3 else argv[-1]

    # load Hungarian network for data association, and freeze all layers.
    hnet_model = HNetGRU(max_len=2).to(device)
    hnet_model.load_state_dict(
        torch.load("models/hnet_model.pt", map_location=torch.device('cpu')))
    for model_params in hnet_model.parameters():
        model_params.requires_grad = False
    print('---------------- Hungarian-net -------------------')
    print(hnet_model)

    # Training setup
    train_splits, val_splits, test_splits = None, None, None
    if params['mode'] == 'dev':
        test_splits = [1]
        val_splits = [2]
        train_splits = [[3, 4, 5, 6]]

    for split_cnt, split in enumerate(test_splits):
        print(
            '\n\n---------------------------------------------------------------------------------------------------'
        )
        print(
            '------------------------------------      SPLIT {}   -----------------------------------------------'
            .format(split))
        print(
            '---------------------------------------------------------------------------------------------------'
        )

        # Unique name for the run
        cls_feature_class.create_folder(params['model_dir'])
        unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id,
                                                   params['dataset'],
                                                   params['mode'], split)
        unique_name = os.path.join(params['model_dir'], unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print("unique_name: {}\n".format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            params=params, split=train_splits[split_cnt])

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            params=params, split=val_splits[split_cnt], shuffle=False)

        # Collect i/o data size and load model configuration
        data_in, data_out = data_gen_train.get_data_sizes()
        model = doanet_model.CRNN(data_in, data_out, params).to(device)
        #        model.load_state_dict(torch.load("models/23_5624972_mic_dev_split1_model.h5", map_location='cpu'))

        print('---------------- DOA-net -------------------')
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))
        print(
            'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n'
            .format(params['dropout_rate'], params['nb_cnn2d_filt'],
                    params['f_pool_size'], params['t_pool_size'],
                    params['rnn_size'], params['fnn_size']))
        print(model)

        # start training
        best_val_epoch = -1
        best_doa, best_mota, best_ids, best_recall, best_precision, best_fscore = 180, 0, 1000, 0, 0, 0
        patience_cnt = 0

        nb_epoch = 2 if params['quick_test'] else params['nb_epochs']
        tr_loss_list = np.zeros(nb_epoch)
        val_loss_list = np.zeros(nb_epoch)
        hung_tr_loss_list = np.zeros(nb_epoch)
        hung_val_loss_list = np.zeros(nb_epoch)

        optimizer = optim.Adam(model.parameters(), lr=params['lr'])
        criterion = torch.nn.MSELoss()
        activity_loss = nn.BCEWithLogitsLoss()

        for epoch_cnt in range(nb_epoch):
            # ---------------------------------------------------------------------
            # TRAINING
            # ---------------------------------------------------------------------
            start_time = time.time()
            train_loss, train_dMOTP_loss, train_dMOTA_loss, train_act_loss = train_epoch(
                data_gen_train, optimizer, model, hnet_model, activity_loss,
                criterion, params, device)
            train_time = time.time() - start_time
            # ---------------------------------------------------------------------
            # VALIDATION
            # ---------------------------------------------------------------------
            start_time = time.time()
            val_metric = doa_metric()
            val_metric, val_loss, val_dMOTP_loss, val_dMOTA_loss, val_act_loss = test_epoch(
                data_gen_val, model, hnet_model, activity_loss, criterion,
                val_metric, params, device)

            val_hung_loss, val_mota, val_ids, val_recall_doa, val_precision_doa, val_fscore_doa = val_metric.get_results(
            )
            val_time = time.time() - start_time

            # Save model if loss is good
            if val_hung_loss <= best_doa:
                best_val_epoch, best_doa, best_mota, best_ids, best_recall, best_precision, best_fscore = epoch_cnt, val_hung_loss, val_mota, val_ids, val_recall_doa, val_precision_doa, val_fscore_doa
                torch.save(model.state_dict(), model_name)

            # Print stats and plot scores
            print(
                'epoch: {}, time: {:0.2f}/{:0.2f}, '
                'train_loss: {:0.2f} {}, val_loss: {:0.2f} {}, '
                'LE/MOTA/IDS/LR/LP/LF: {:0.3f}/{}, '
                'best_val_epoch: {} {}'.format(
                    epoch_cnt, train_time, val_time,
                    train_loss, '({:0.2f},{:0.2f},{:0.2f})'.format(
                        train_dMOTP_loss, train_dMOTA_loss, train_act_loss)
                    if params['use_hnet'] else '',
                    val_loss, '({:0.2f},{:0.2f},{:0.2f})'.format(
                        val_dMOTP_loss, val_dMOTA_loss, val_act_loss)
                    if params['use_hnet'] else '', val_hung_loss,
                    '{:0.2f}/{:0.2f}/{:0.2f}/{:0.2f}/{:0.2f}'.format(
                        val_mota, val_ids, val_recall_doa, val_precision_doa,
                        val_fscore_doa), best_val_epoch,
                    '({:0.2f}/{:0.2f}/{:0.2f}/{:0.2f}/{:0.2f}/{:0.2f})'.format(
                        best_doa, best_mota, best_ids, best_recall,
                        best_precision, best_fscore)))

            tr_loss_list[epoch_cnt], val_loss_list[
                epoch_cnt], hung_val_loss_list[
                    epoch_cnt] = train_loss, val_loss, val_hung_loss
            plot_functions(unique_name, tr_loss_list, val_loss_list,
                           hung_tr_loss_list, hung_val_loss_list)

            patience_cnt += 1
            if patience_cnt > params['patience']:
                break

        # ---------------------------------------------------------------------
        # Evaluate on unseen test data
        # ---------------------------------------------------------------------
        print('Load best model weights')
        model.load_state_dict(torch.load(model_name, map_location='cpu'))

        print('Loading unseen test dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            params=params, split=test_splits[split_cnt], shuffle=False)

        test_metric = doa_metric()
        test_metric, test_loss, test_dMOTP_loss, test_dMOTA_loss, test_act_loss = test_epoch(
            data_gen_test, model, hnet_model, activity_loss, criterion,
            test_metric, params, device)

        test_hung_loss, test_mota, test_ids, test_recall_doa, test_precision_doa, test_fscore_doa = test_metric.get_results(
        )

        print('test_loss: {:0.2f} {}, LE/MOTA/IDS/LR/LP/LF: {:0.3f}/{}'.format(
            test_loss, '({:0.2f},{:0.2f},{:0.2f})'.format(
                test_dMOTP_loss, test_dMOTA_loss, test_act_loss)
            if params['use_hnet'] else '', test_hung_loss,
            '{:0.2f}/{:0.2f}/{:0.2f}/{:0.2f}/{:0.2f}'.format(
                test_mota, test_ids, test_recall_doa, test_precision_doa,
                test_fscore_doa)))