def get_SELD_Results(self, pred_files_path):
        # collect predicted files info
        pred_files = os.listdir(pred_files_path)
        eval = SELD_evaluation_metrics.SELDMetrics(
            nb_classes=self._feat_cls.get_nb_classes(),
            doa_threshold=self._doa_thresh)
        for pred_cnt, pred_file in enumerate(pred_files):
            # Load predicted output format file
            pred_dict = self._feat_cls.load_output_format_file(
                os.path.join(pred_files_path, pred_file))
            if self._use_polar_format:
                pred_dict = self._feat_cls.convert_output_format_cartesian_to_polar(
                    pred_dict)
            pred_labels = self._feat_cls.segment_labels(
                pred_dict, self._feat_cls.get_nb_frames())

            # Calculated scores
            eval.update_seld_scores(pred_labels, self._ref_labels[pred_file])

        # Overall SED and DOA scores
        ER, F, LE, LR = eval.compute_seld_scores()
        seld_scr = SELD_evaluation_metrics.early_stopping_metric([ER, F],
                                                                 [LE, LR])

        return ER, F, LE, LR, seld_scr
    def get_consolidated_SELD_results(self, pred_files_path, score_type_list=['all', 'room']):
        '''
            Get all categories of results.
            ;score_type_list: Supported
                'all' - all the predicted files
                'room' - for individual rooms

        '''

        # collect predicted files info
        pred_files = os.listdir(pred_files_path)
        nb_pred_files = len(pred_files)

        # Calculate scores for different splits, overlapping sound events, and impulse responses (reverberant scenes)

        print('Number of predicted files: {}\nNumber of reference files: {}'.format(nb_pred_files, self._nb_ref_files))
        print('\nCalculating {} scores for {}'.format(score_type_list, os.path.basename(pred_output_format_files)))

        for score_type in score_type_list:
            print('\n\n---------------------------------------------------------------------------------------------------')
            print('------------------------------------  {}   ---------------------------------------------'.format('Total score' if score_type=='all' else 'score per {}'.format(score_type)))
            print('---------------------------------------------------------------------------------------------------')

            split_cnt_dict = self.get_nb_files(pred_files, tag=score_type) # collect files corresponding to score_type
            # Calculate scores across files for a given score_type
            for split_key in np.sort(list(split_cnt_dict)):
                # Load evaluation metric class
                eval = SELD_evaluation_metrics.SELDMetrics(nb_classes=self._feat_cls.get_nb_classes(), doa_threshold=self._doa_thresh)
                for pred_cnt, pred_file in enumerate(split_cnt_dict[split_key]):
                    # Load predicted output format file
                    pred_dict = self._feat_cls.load_output_format_file(os.path.join(pred_output_format_files, pred_file))
                    if self._use_polar_format:
                        pred_dict = self._feat_cls.convert_output_format_cartesian_to_polar(pred_dict)
                    pred_labels = self._feat_cls.segment_labels(pred_dict, self._feat_cls.get_nb_frames())

                    # Calculated scores
                    eval.update_seld_scores(pred_labels, self._ref_labels[pred_file])

                # Overall SED and DOA scores
                ER, F, LE, LR = eval.compute_seld_scores()
                seld_scr = SELD_evaluation_metrics.early_stopping_metric([ER, F], [LE, LR])

                print('\nAverage score for {} {} data using {} coordinates'.format(score_type, 'fold' if score_type=='all' else split_key, 'Polar' if self._use_polar_format else 'Cartesian' ))
                print('SELD score (early stopping metric): {:0.2f}'.format(seld_scr))
                print('SED metrics: Error rate: {:0.2f}, F-score:{:0.1f}'.format(ER, 100*F))
                print('DOA metrics: Localization error: {:0.1f}, Localization Recall: {:0.1f}'.format(LE, 100*LR))
Пример #3
0
        def metric_dcase2020(gen, sed_pred, doa_pred):
            sed_gt = gen.all_label_sed_2d()
            doa_gt = gen.all_label_doa_2d()
            cls_new_metric = SELD_evaluation_metrics.SELDMetrics(
                nb_classes=gen._Ncat, doa_threshold=params['lad_doa_thresh'])
            pred_dict = feat_cls.regression_label_format_to_output_format(
                sed_pred, doa_pred)
            gt_dict = feat_cls.regression_label_format_to_output_format(
                sed_gt, doa_gt)
            pred_blocks_dict = feat_cls.segment_labels(pred_dict,
                                                       sed_pred.shape[0])
            gt_blocks_dict = feat_cls.segment_labels(gt_dict, sed_gt.shape[0])

            cls_new_metric.update_seld_scores_xyz(pred_blocks_dict,
                                                  gt_blocks_dict)
            new_metric = cls_new_metric.compute_seld_scores()
            new_seld_metric = evaluation_metrics.early_stopping_metric(
                new_metric[:2], new_metric[2:])

            return new_metric, new_seld_metric
Пример #4
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: task_id - (optional) To chose the system configuration in parameters.py.
                                (default) 1 - uses default parameters
        second input: job_id - (optional) all the output files will be uniquely represented with this.
                              (default) 1

    """
    print(argv)
    if len(argv) != 3:
        print('\n\n')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('The code expected two optional inputs')
        print('\t>> python seld.py <task-id> <job-id>')
        print(
            '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py'
        )
        print('Using default inputs for now')
        print(
            '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
            'You can use any number or string for this.')
        print(
            '-------------------------------------------------------------------------------------------------------'
        )
        print('\n\n')

    # use parameter set defined by user
    task_id = '1' if len(argv) < 2 else argv[1]
    params = parameter.get_params(task_id)

    job_id = 1 if len(argv) < 3 else argv[-1]

    feat_cls = cls_feature_class.FeatureClass(params)
    train_splits, val_splits, test_splits = None, None, None

    if params['mode'] == 'dev':
        test_splits = [1]
        val_splits = [2]
        train_splits = [[3, 4, 5, 6]]

    elif params['mode'] == 'eval':
        test_splits = [[7, 8]]
        val_splits = [[1]]
        train_splits = [[2, 3, 4, 5, 6]]

    avg_scores_val = []
    avg_scores_test = []
    for split_cnt, split in enumerate(test_splits):
        print(
            '\n\n---------------------------------------------------------------------------------------------------'
        )
        print(
            '------------------------------------      SPLIT {}   -----------------------------------------------'
            .format(split))
        print(
            '---------------------------------------------------------------------------------------------------'
        )

        # Unique name for the run
        cls_feature_class.create_folder(params['model_dir'])
        unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id,
                                                   params['dataset'],
                                                   params['mode'], split)
        unique_name = os.path.join(params['model_dir'], unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print("unique_name: {}\n".format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = cls_data_generator.DataGenerator(
            params=params, split=train_splits[split_cnt])

        print('Loading validation dataset:')
        data_gen_val = cls_data_generator.DataGenerator(
            params=params, split=val_splits[split_cnt], shuffle=False)

        # Collect the reference labels for validation data
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(
            data_in, data_out))

        nb_classes = data_gen_train.get_nb_classes()
        gt = collect_test_labels(data_gen_val, data_out, nb_classes,
                                 params['quick_test'])
        sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0])
        doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1])

        print(
            'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n'
            .format(params['dropout_rate'], params['nb_cnn2d_filt'],
                    params['f_pool_size'], params['t_pool_size'],
                    params['rnn_size'], params['fnn_size'],
                    params['doa_objective']))

        print('Using loss weights : {}'.format(params['loss_weights']))
        model = keras_model.get_model(data_in=data_in,
                                      data_out=data_out,
                                      dropout_rate=params['dropout_rate'],
                                      nb_cnn2d_filt=params['nb_cnn2d_filt'],
                                      f_pool_size=params['f_pool_size'],
                                      t_pool_size=params['t_pool_size'],
                                      rnn_size=params['rnn_size'],
                                      fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'],
                                      doa_objective=params['doa_objective'])
        best_seld_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        nb_epoch = 2 if params['quick_test'] else params['nb_epochs']
        seld_metric = np.zeros(nb_epoch)
        new_seld_metric = np.zeros(nb_epoch)
        tr_loss = np.zeros(nb_epoch)
        doa_metric = np.zeros((nb_epoch, 6))
        sed_metric = np.zeros((nb_epoch, 2))
        new_metric = np.zeros((nb_epoch, 4))

        # start training
        for epoch_cnt in range(nb_epoch):
            start = time.time()

            # train once per epoch
            hist = model.fit_generator(
                generator=data_gen_train.generate(),
                steps_per_epoch=2 if params['quick_test'] else
                data_gen_train.get_total_batches_in_data(),
                epochs=params['epochs_per_fit'],
                verbose=2,
            )
            tr_loss[epoch_cnt] = hist.history.get('loss')[-1]

            # predict once per peoch
            pred = model.predict_generator(
                generator=data_gen_val.generate(),
                steps=2 if params['quick_test'] else
                data_gen_val.get_total_batches_in_data(),
                verbose=2)

            sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5
            doa_pred = evaluation_metrics.reshape_3Dto2D(
                pred[1]
                if params['doa_objective'] is 'mse' else pred[1][:, :,
                                                                 nb_classes:])

            # Calculate the DCASE 2019 metrics - Detection-only and Localization-only scores
            sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(
                sed_pred, sed_gt, data_gen_val.nb_frames_1s())
            doa_metric[
                epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr_xyz(
                    doa_pred, doa_gt, sed_pred, sed_gt)
            seld_metric[epoch_cnt] = evaluation_metrics.early_stopping_metric(
                sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :])

            # Calculate the DCASE 2020 metrics - Location-aware detection and Class-aware localization scores
            cls_new_metric = SELD_evaluation_metrics.SELDMetrics(
                nb_classes=data_gen_val.get_nb_classes(),
                doa_threshold=params['lad_doa_thresh'])
            pred_dict = feat_cls.regression_label_format_to_output_format(
                sed_pred, doa_pred)
            gt_dict = feat_cls.regression_label_format_to_output_format(
                sed_gt, doa_gt)

            pred_blocks_dict = feat_cls.segment_labels(pred_dict,
                                                       sed_pred.shape[0])
            gt_blocks_dict = feat_cls.segment_labels(gt_dict, sed_gt.shape[0])

            cls_new_metric.update_seld_scores_xyz(pred_blocks_dict,
                                                  gt_blocks_dict)
            new_metric[epoch_cnt, :] = cls_new_metric.compute_seld_scores()
            new_seld_metric[
                epoch_cnt] = evaluation_metrics.early_stopping_metric(
                    new_metric[epoch_cnt, :2], new_metric[epoch_cnt, 2:])

            # Visualize the metrics with respect to epochs
            plot_functions(unique_name, tr_loss, sed_metric, doa_metric,
                           seld_metric, new_metric, new_seld_metric)

            patience_cnt += 1
            if new_seld_metric[epoch_cnt] < best_seld_metric:
                best_seld_metric = new_seld_metric[epoch_cnt]
                best_epoch = epoch_cnt
                model.save(model_name)
                patience_cnt = 0

            print(
                'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, '
                '\n\t\t DCASE2019 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, FR:{:0.1f}, seld_score: {:0.2f}, '
                '\n\t\t DCASE2020 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, DE_F:{:0.1f}, seld_score (early stopping score): {:0.2f}, '
                'best_seld_score: {:0.2f}, best_epoch : {}\n'.format(
                    epoch_cnt,
                    time.time() - start, tr_loss[epoch_cnt],
                    sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1] * 100,
                    doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1] * 100,
                    seld_metric[epoch_cnt], new_metric[epoch_cnt, 0],
                    new_metric[epoch_cnt, 1] * 100, new_metric[epoch_cnt, 2],
                    new_metric[epoch_cnt, 3] * 100, new_seld_metric[epoch_cnt],
                    best_seld_metric, best_epoch))
            if patience_cnt > params['patience']:
                break

        avg_scores_val.append([
            new_metric[best_epoch, 0], new_metric[best_epoch, 1],
            new_metric[best_epoch, 2], new_metric[best_epoch,
                                                  3], best_seld_metric
        ])
        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSELD_score (early stopping score) : {}'.format(
            best_seld_metric))

        print('\n\tDCASE2020 scores')
        print(
            '\tClass-aware localization scores: DOA_error: {:0.1f}, F-score: {:0.1f}'
            .format(new_metric[best_epoch, 2],
                    new_metric[best_epoch, 3] * 100))
        print(
            '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
            .format(new_metric[best_epoch, 0],
                    new_metric[best_epoch, 1] * 100))

        print('\n\tDCASE2019 scores')
        print(
            '\tLocalization-only scores: DOA_error: {:0.1f}, Frame recall: {:0.1f}'
            .format(doa_metric[best_epoch, 0],
                    doa_metric[best_epoch, 1] * 100))
        print(
            '\tDetection-only scores: Error rate: {:0.2f}, F-score: {:0.1f}\n'.
            format(sed_metric[best_epoch, 0], sed_metric[best_epoch, 1] * 100))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print(
            '\nLoading the best model and predicting results on the testing split'
        )
        print('\tLoading testing dataset:')
        data_gen_test = cls_data_generator.DataGenerator(
            params=params,
            split=split,
            shuffle=False,
            per_file=params['dcase_output'],
            is_eval=True if params['mode'] is 'eval' else False)

        model = keras_model.load_seld_model('{}_model.h5'.format(unique_name),
                                            params['doa_objective'])
        pred_test = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=2 if params['quick_test'] else
            data_gen_test.get_total_batches_in_data(),
            verbose=2)

        test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5
        test_doa_pred = evaluation_metrics.reshape_3Dto2D(
            pred_test[1]
            if params['doa_objective'] is 'mse' else pred_test[1][:, :,
                                                                  nb_classes:])

        if params['dcase_output']:
            # Dump results in DCASE output format for calculating final scores
            dcase_dump_folder = os.path.join(
                params['dcase_dir'],
                '{}_{}_{}'.format(task_id, params['dataset'], params['mode']))
            cls_feature_class.create_folder(dcase_dump_folder)
            print('Dumping recording-wise results in: {}'.format(
                dcase_dump_folder))

            test_filelist = data_gen_test.get_filelist()
            # Number of frames for a 60 second audio with 20ms hop length = 3000 frames
            max_frames_with_content = data_gen_test.get_nb_frames()

            # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with
            # zero padding in the remaining frames
            frames_per_file = data_gen_test.get_frame_per_file()

            for file_cnt in range(test_sed_pred.shape[0] // frames_per_file):
                output_file = os.path.join(
                    dcase_dump_folder,
                    test_filelist[file_cnt].replace('.npy', '.csv'))
                dc = file_cnt * frames_per_file
                output_dict = feat_cls.regression_label_format_to_output_format(
                    test_sed_pred[dc:dc + max_frames_with_content, :],
                    test_doa_pred[dc:dc + max_frames_with_content, :])
                data_gen_test.write_output_format_file(output_file,
                                                       output_dict)

        if params['mode'] is 'dev':
            test_data_in, test_data_out = data_gen_test.get_data_sizes()
            test_gt = collect_test_labels(data_gen_test, test_data_out,
                                          nb_classes, params['quick_test'])
            test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0])
            test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1])

            # Calculate DCASE2019 scores
            test_sed_loss = evaluation_metrics.compute_sed_scores(
                test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
            test_doa_loss = evaluation_metrics.compute_doa_scores_regr_xyz(
                test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt)
            test_metric_loss = evaluation_metrics.early_stopping_metric(
                test_sed_loss, test_doa_loss)

            # Calculate DCASE2020 scores
            cls_new_metric = SELD_evaluation_metrics.SELDMetrics(
                nb_classes=data_gen_test.get_nb_classes(), doa_threshold=20)
            test_pred_dict = feat_cls.regression_label_format_to_output_format(
                test_sed_pred, test_doa_pred)
            test_gt_dict = feat_cls.regression_label_format_to_output_format(
                test_sed_gt, test_doa_gt)

            test_pred_blocks_dict = feat_cls.segment_labels(
                test_pred_dict, test_sed_pred.shape[0])
            test_gt_blocks_dict = feat_cls.segment_labels(
                test_gt_dict, test_sed_gt.shape[0])

            cls_new_metric.update_seld_scores_xyz(test_pred_blocks_dict,
                                                  test_gt_blocks_dict)
            test_new_metric = cls_new_metric.compute_seld_scores()
            test_new_seld_metric = evaluation_metrics.early_stopping_metric(
                test_new_metric[:2], test_new_metric[2:])

            avg_scores_test.append([
                test_new_metric[0], test_new_metric[1], test_new_metric[2],
                test_new_metric[3], test_new_seld_metric
            ])
            print('Results on test split:')

            print('\tDCASE2020 Scores')
            print(
                '\tClass-aware localization scores: DOA Error: {:0.1f}, F-score: {:0.1f}'
                .format(test_new_metric[2], test_new_metric[3] * 100))
            print(
                '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}'
                .format(test_new_metric[0], test_new_metric[1] * 100))
            print('\tSELD (early stopping metric): {:0.2f}'.format(
                test_new_seld_metric))

            print('\n\tDCASE2019 Scores')
            print(
                '\tLocalization-only scores: DOA Error: {:0.1f}, Frame recall: {:0.1f}'
                .format(test_doa_loss[0], test_doa_loss[1] * 100))
            print(
                '\tDetection-only scores:Error rate: {:0.2f}, F-score: {:0.1f}'
                .format(test_sed_loss[0], test_sed_loss[1] * 100))
    print(
        '------------------------------------  {}   ---------------------------------------------'
        .format('Total score' if score_type ==
                'all' else 'score per {}'.format(score_type)))
    print(
        '---------------------------------------------------------------------------------------------------'
    )

    split_cnt_dict = get_nb_files(
        pred_files,
        _group=score_type)  # collect files corresponding to score_type
    # Calculate scores across files for a given score_type
    for split_key in np.sort(list(split_cnt_dict)):
        # Load evaluation metric class
        eval = SELD_evaluation_metrics.SELDMetrics(
            nb_classes=feat_cls.get_nb_classes(),
            doa_threshold=params['lad_doa_thresh'])
        for pred_cnt, pred_file in enumerate(split_cnt_dict[split_key]):
            # Load predicted output format file
            pred_dict = feat_cls.load_output_format_file(
                os.path.join(pred_output_format_files, pred_file))
            if use_polar_format:
                pred_dict_polar = feat_cls.convert_output_format_cartesian_to_polar(
                    pred_dict)
                pred_labels = feat_cls.segment_labels(pred_dict_polar,
                                                      feat_cls.get_nb_frames())
            else:
                pred_labels = feat_cls.segment_labels(pred_dict,
                                                      feat_cls.get_nb_frames())

            # Load reference description file