示例#1
0
    def metrics(self, list_dict, submissions_dir, metadata_dir):

        write_submission(list_dict, submissions_dir)
        prediction_paths = [
            os.path.join(submissions_dir, '{}.csv'.format(dict['name']))
            for dict in list_dict
        ]
        metrics = calculate_metrics(metadata_dir, prediction_paths)

        for key in metrics.keys():
            logging.info('    {:<20} {:.3f}'.format(key + ' :', metrics[key]))

        return metrics
示例#2
0
def optimize_sed_with_gd(output_dict, submission_path, reference_csv_path,
                         sed_params_dict, metric_type):
    """Optimize thresholds for SED.

    Args:
      output_dict: {'clipwise_output': (N, classes_num), 
                    'framewise_output': (N, frames_num, classes_num)}
      submission_path: str
      reference_csv_path: str
      sed_params_dict: dict
      metric_type: 'f1' | 'er'

    Returns:
      metric: float
      sed_params_dict: dict, optimized thresholds
    """
    predict_event_list = frame_prediction_to_event_prediction(
        output_dict, sed_params_dict)

    write_submission(predict_event_list, submission_path)
    results = official_evaluate(reference_csv_path, submission_path)
    metric = _get_metric(results, metric_type)
    print('Initial {}: {}'.format(metric_type, metric))
    print('Running optimization on thresholds.')

    opt = Adam()
    opt.alpha = 2e-2
    for i in range(10):
        grads = calculate_sed_gradient(output_dict, submission_path,
                                       reference_csv_path, sed_params_dict,
                                       metric_type)

        if metric_type == 'f1':
            grads = [-e for e in grads]
        elif metric_type == 'er':
            pass

        params = sed_dict_to_params(sed_params_dict)
        sed_params = opt.GetNewParams(params, grads)
        sed_params_dict = sed_params_to_dict(sed_params, sed_params_dict)

        predict_event_list = frame_prediction_to_event_prediction(
            output_dict, sed_params_dict)

        write_submission(predict_event_list, submission_path)
        results = official_evaluate(reference_csv_path, submission_path)
        metric = _get_metric(results, metric_type)
        print('******')
        print('Iteration: {}, {}: {}'.format(i, metric_type, metric))

    return metric, sed_params_dict
示例#3
0
def calculate_sed_gradient(output_dict, submission_path, reference_csv_path,
                           sed_params_dict, metric_type):
    """Optimize thresholds for SED.

    Args:
      output_dict: {'clipwise_output': (N, classes_num), 
                    'framewise_output': (N, frames_num, classes_num)}
      submission_path: str
      reference_csv_path: str
      sed_params_dict: dict
      metric_type: 'f1' | 'er'

    Returns:
      grads: vector
    """
    predict_event_list = frame_prediction_to_event_prediction(
        output_dict, sed_params_dict)

    write_submission(predict_event_list, submission_path)
    results = official_evaluate(reference_csv_path, submission_path)
    value = _get_metric(results, metric_type)

    grads = []
    params = sed_dict_to_params(sed_params_dict)

    for k, param in enumerate(params):
        print('Param index: {} / {}'.format(k, len(params)))
        new_params = params.copy()
        delta = 0.1
        cnt = 0
        while cnt < 3:
            cnt += 1
            new_params[k] += delta
            new_params_dict = sed_params_to_dict(new_params, sed_params_dict)

            predict_event_list = frame_prediction_to_event_prediction(
                output_dict, new_params_dict)

            write_submission(predict_event_list, submission_path)
            results = official_evaluate(reference_csv_path, submission_path)
            new_value = _get_metric(results, metric_type)

            if new_value != value:
                break

        grad = (new_value - value) / (delta * cnt)
        grads.append(grad)

    return grads
示例#4
0
    def evaluate(self,
                 data_type,
                 metadata_dir,
                 submissions_dir,
                 max_validate_num=None):
        '''Evaluate the performance. 
        
        Args: 
          data_type: 'train' | 'validate'
          metadata_dir: string, directory of reference meta csvs
          submissions_dir: string: directory to write out submission csvs
          max_validate_num: None | int, maximum iteration to run to speed up 
              evaluation
        '''

        # Forward
        generate_func = self.data_generator.generate_validate(
            data_type=data_type, max_validate_num=max_validate_num)

        list_dict = forward(model=self.model,
                            generate_func=generate_func,
                            cuda=self.cuda,
                            return_target=True)

        # Calculate loss
        (total_loss, event_loss,
         position_loss) = self.calculate_loss(list_dict)

        logging.info('{:<20} {}: {:.3f}, {}: {:.3f}, {}: {:.3f}'
                     ''.format(data_type + ' statistics: ', 'total_loss',
                               total_loss, 'event_loss', event_loss,
                               'position_loss', position_loss))

        # Write out submission and evaluate using code provided by organizer
        write_submission(list_dict, submissions_dir)

        prediction_paths = [
            os.path.join(submissions_dir, '{}.csv'.format(dict['name']))
            for dict in list_dict
        ]

        statistics = calculate_metrics(metadata_dir, prediction_paths)

        for key in statistics.keys():
            logging.info('    {:<20} {:.3f}'.format(key + ' :',
                                                    statistics[key]))

        return statistics
    def evaluate(self, data_loader, reference_csv_path, submission_path):
        """Evaluate AT and SED performance.

        Args:
          data_loader: object
          reference_csv_path: str, strongly labelled ground truth csv
          submission: str, path to write out submission file

        Returns:
          statistics: dict
          output_dict: dict
        """
        output_dict = forward(model=self.model,
                              data_loader=data_loader,
                              return_input=False,
                              return_target=True)

        statistics = {}

        # Clipwise statistics
        statistics['clipwise_ap'] = metrics.average_precision_score(
            output_dict['target'],
            output_dict['clipwise_output'],
            average=None)

        # Framewise statistics
        if 'strong_target' in output_dict.keys():
            statistics['framewise_ap'] = sed_average_precision(
                output_dict['strong_target'],
                output_dict['framewise_output'],
                average=None)

        # Framewise predictions to eventwise predictions
        predict_event_list = frame_prediction_to_event_prediction(
            output_dict, self.sed_params_dict)

        # Write eventwise predictions to submission file
        write_submission(predict_event_list, submission_path)

        # SED with official tool
        statistics['sed_metrics'] = official_evaluate(reference_csv_path,
                                                      submission_path)

        return statistics, output_dict
    def evaluate(self, reference_csv_path, submission_path):
        """Evaluate AT and SED performance.

        Args:
          reference_csv_path: str, strongly labelled ground truth csv
          submission: str, path to write out submission file
        """
        output_dict = forward(
            model=self.model, 
            generator=self.generator, 
            return_input=False, 
            return_target=True)

        predictions = {'clipwise_output': output_dict['clipwise_output'], 
            'framewise_output': output_dict['framewise_output']}

        statistics = {}
        
        # Weak statistics
        clipwise_ap = metrics.average_precision_score(
            output_dict['target'], output_dict['clipwise_output'], average=None)
        statistics['clipwise_ap'] = clipwise_ap
        logging.info('    clipwise mAP: {:.3f}'.format(np.mean(clipwise_ap)))

        if 'strong_target' in output_dict.keys():
            framewise_ap = sed_average_precision(output_dict['strong_target'], 
                output_dict['framewise_output'], average=None)
            statistics['framewise_ap'] = framewise_ap
            logging.info('    framewise mAP: {:.3f}'.format(np.mean(framewise_ap)))
         
        # Obtain eventwise prediction frame framewise prediction using predefined thresholds
        predict_event_list = frame_prediction_to_event_prediction(output_dict, 
            self.sed_params_dict)
        
        # Write predicted events to submission file
        write_submission(predict_event_list, submission_path)

        # SED with official tool
        results = official_evaluate(reference_csv_path, submission_path)
        logging.info('    {}'.format(results['overall']['error_rate']))
        statistics['sed_metrics'] = results

        return statistics, predictions
示例#7
0
    def __call__(self, params):
        """Use hyper parameters to threshold prediction to obtain output.
        Then, the scores are calculated between output and target.
        """
        params_dict = self.params_list_to_params_dict(params)
        # params_dict['n_smooth'] = 1
        # params_dict['n_salt'] = 1

        predict_event_list = frame_prediction_to_event_prediction(
            self.output_dict, params_dict)

        # Write predicted events to submission file
        write_submission(predict_event_list, self.submission_path)

        # SED with official tool
        results = official_evaluate(self.reference_csv_path,
                                    self.submission_path)

        f1 = results['overall']['f_measure']['f_measure']

        return f1
def inference_evaluation(args):
    '''Inference on evaluation data and write out submission file. 
    
    Args: 
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'leaderboard' | 'evaluation'
      workspace: string, directory of workspace
      model_type: string, e.g. 'Cnn_9layers'
      iteration: int
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''
    # Arugments & parameters
    subtask = args.subtask
    data_type = args.data_type
    workspace = args.workspace
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    holdout_fold = 'none'
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
        
    sub_dir = get_subdir(subtask, data_type)
    trained_sub_dir = get_subdir(subtask, 'development')
    
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(trained_sub_dir))
        
    checkpoint_path = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(trained_sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, '{}_iterations.pth'.format(iteration))
    
    submission_path = os.path.join(workspace, 'submissions', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        sub_dir, 'holdout_fold={}'.format(holdout_fold), model_type, 
        '{}_iterations'.format(iteration), 'submission.csv')
    create_folder(os.path.dirname(submission_path))

    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)
        
    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy
        
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])
    
    if cuda:
        model.cuda()
        
    # Data generator
    data_generator = EvaluationDataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        scalar=scalar, 
        batch_size=batch_size)
    
    generate_func = data_generator.generate_evaluation(data_type)

    # Inference
    output_dict = forward(model, generate_func, cuda, return_input=False, 
        return_target=False)

    # Write submission
    write_submission(output_dict, subtask, data_type, submission_path)
示例#9
0
    def evaluate(self,
                 data_type,
                 metadata_path,
                 submission_path,
                 max_iteration=None):
        '''Write out submission file and evaluate the performance. 
        
        Args: 
          data_type: 'train' | 'validate'
          metadata_path: string, path of reference csv
          submission_path: string, path to write out submission
          max_iteration: None | int, maximum iteration to run to speed up 
              evaluation
        '''
        generate_func = self.data_generator.generate_validate(
            data_type=data_type, max_iteration=max_iteration)

        # Forward
        output_dict = forward(model=self.model,
                              generate_func=generate_func,
                              cuda=self.cuda,
                              return_target=True)

        # Evaluate audio tagging
        if 'weak_target' in output_dict:
            weak_target = output_dict['weak_target']
            clipwise_output = output_dict['clipwise_output']
            average_precision = metrics.average_precision_score(
                weak_target, clipwise_output, average=None)
            mAP = np.mean(average_precision)

            logging.info('{} statistics:'.format(data_type))
            logging.info('    Audio tagging mAP: {:.3f}'.format(mAP))

        statistics = {}
        statistics['average_precision'] = average_precision

        if 'strong_target' in output_dict:
            # Write out submission file
            write_submission(output_dict, self.sed_params_dict,
                             submission_path)

            # Evaluate SED with official tools
            reference_dict = read_csv_file_for_sed_eval_tool(metadata_path)
            predict_dict = read_csv_file_for_sed_eval_tool(submission_path)

            # Event & segment based metrics
            event_based_metric = sed_eval.sound_event.EventBasedMetrics(
                event_label_list=config.labels,
                evaluate_onset=True,
                evaluate_offset=True,
                t_collar=0.200,
                percentage_of_length=0.2)

            segment_based_metric = sed_eval.sound_event.SegmentBasedMetrics(
                event_label_list=config.labels, time_resolution=0.2)

            for audio_name in output_dict['audio_name']:
                if audio_name in reference_dict.keys():
                    ref_list = reference_dict[audio_name]
                else:
                    ref_list = []

                if audio_name in predict_dict.keys():
                    pred_list = predict_dict[audio_name]
                else:
                    pred_list = []

                event_based_metric.evaluate(ref_list, pred_list)
                segment_based_metric.evaluate(ref_list, pred_list)

            event_metrics = event_based_metric.results_class_wise_average_metrics(
            )
            f_measure = event_metrics['f_measure']['f_measure']
            error_rate = event_metrics['error_rate']['error_rate']
            deletion_rate = event_metrics['error_rate']['deletion_rate']
            insertion_rate = event_metrics['error_rate']['insertion_rate']

            statistics['event_metrics'] = {
                'f_measure': f_measure,
                'error_rate': error_rate,
                'deletion_rate': deletion_rate,
                'insertion_rate': insertion_rate
            }

            logging.info('    Event-based, classwise F score: {:.3f}, ER: '
                         '{:.3f}, Del: {:.3f}, Ins: {:.3f}'.format(
                             f_measure, error_rate, deletion_rate,
                             insertion_rate))

            segment_metrics = segment_based_metric.results_class_wise_average_metrics(
            )
            f_measure = segment_metrics['f_measure']['f_measure']
            error_rate = segment_metrics['error_rate']['error_rate']
            deletion_rate = segment_metrics['error_rate']['deletion_rate']
            insertion_rate = segment_metrics['error_rate']['insertion_rate']

            statistics['segment_metrics'] = {
                'f_measure': f_measure,
                'error_rate': error_rate,
                'deletion_rate': deletion_rate,
                'insertion_rate': insertion_rate
            }

            logging.info('    Segment based, classwise F score: {:.3f}, ER: '
                         '{:.3f}, Del: {:.3f}, Ins: {:.3f}'.format(
                             f_measure, error_rate, deletion_rate,
                             insertion_rate))

            if self.verbose:
                logging.info(event_based_metric)
                logging.info(segment_based_metric)

            return statistics
示例#10
0
        model.load_weights(checkpoint_path)

    summary_path = os.path.join(SUMMARY_PATH, 'model_{}'.format(num_folds))
    mkdirp(summary_path)

    callbacks = [EarlyStopping(monitor='val_loss', patience=1, verbose=0, mode='auto'),
                 ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=0, save_best_only=True, mode='auto'),
                 TensorBoard(log_dir=summary_path, histogram_freq=0)]
    model.fit(X_train, y_train, batch_size=BATCH_SIZE, nb_epoch=NB_EPOCHS,
              shuffle=True,
              verbose=1,
              validation_data=(X_valid, y_valid),
              callbacks=callbacks)

    predictions_valid = model.predict(X_valid, batch_size=100, verbose=1)
    score_valid = log_loss(y_valid, predictions_valid)
    scores_total.append(score_valid)

    print('Score: {}'.format(score_valid))

    predictions_test = model.predict(X_test, batch_size=100, verbose=1)
    predictions_total.append(predictions_test)

    num_folds += 1

score_geom = calc_geom(scores_total, MAX_FOLDS)
predictions_geom = calc_geom_arr(predictions_total, MAX_FOLDS)

submission_path = os.path.join(SUMMARY_PATH, 'submission_{}_{:.2}.csv'.format(int(time.time()), score_geom))
write_submission(predictions_geom, X_test_ids, submission_path)
示例#11
0
                wait += 1
                print(
                    'Validation loss did not improve for {}/{} epochs.'.format(
                        wait, patience))

            if wait == 2:
                print(
                    'Stopping early. Validation loss did not improve for {}/{} epochs.'
                    .format(wait, patience))
                break

        model.summary_writer.close()
        scores_total.append(score)

        print('Begin evaluation...')
        predictions = model.evaluate(X_test)
        predictions_total.append(predictions)

    num_folds += 1

score_geom = calc_geom(scores_total, num_folds)
predictions_geom = calc_geom_arr(predictions_total, num_folds)

print('Writing submission for {} folds, score: {}...'.format(
    num_folds, score_geom))
submission_dest = os.path.join(
    SUMMARY_PATH, 'submission_{}_{}.csv'.format(int(time.time()), score_geom))
write_submission(predictions_geom, X_test_ids, submission_dest)

print('Done.')
def inference_test(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      train_sources: 'curated' | 'noisy' | 'curated_and_noisy'
      segment_seconds: float, duration of audio recordings to be padded or split
      hop_seconds: float, hop seconds between segments
      pad_type: 'constant' | 'repeat'
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      iteration: int, load model of this iteration
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool, visualize the logmel spectrogram of segments
    '''

    # Arugments & parameters
    dataset_dir = DATASET_DIR
    workspace = WORKSPACE
    train_source = args.train_source
    segment_seconds = args.segment_seconds
    hop_seconds = args.hop_seconds
    pad_type = args.pad_type
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    resume = args.resume
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    holdout_fold = args.holdout_fold  # Use model trained on full data without validation
    mel_bins = config.mel_bins
    classes_num = config.classes_num
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    test_feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'test.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train_noisy.h5')

    if not resume:
        checkpoint_path = os.path.join(
            workspace, 'checkpoints', filename,
            'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'
            ''.format(segment_seconds, hop_seconds,
                      pad_type), 'holdout_fold={}'
            ''.format(holdout_fold), model_type,
            '{}_iterations.pth'.format(iteration))

        submission_path = os.path.join(
            workspace, 'submissions', filename,
            'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'
            ''.format(segment_seconds, hop_seconds,
                      pad_type), 'holdout_fold={}'
            ''.format(holdout_fold), model_type, '{}_iterations_submission.csv'
            ''.format(iteration))
        create_folder(os.path.dirname(submission_path))
    else:
        checkpoint_path = os.path.join(
            workspace, 'checkpoints', filename,
            'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'
            ''.format(segment_seconds, hop_seconds,
                      pad_type), 'holdout_fold={}'
            ''.format(holdout_fold), model_type, 'resume',
            '{}_iterations.pth'.format(iteration))

        submission_path = os.path.join(
            workspace, 'submissions', filename,
            'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins),
            'train_source={}'.format(train_source),
            'segment={}s,hop={}s,pad_type={}'
            ''.format(segment_seconds, hop_seconds,
                      pad_type), 'holdout_fold={}'
            ''.format(holdout_fold), model_type, 'resume',
            '{}_iterations_submission.csv'
            ''.format(iteration))
        create_folder(os.path.dirname(submission_path))

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    if model_type == 'cbam_ResNet18':
        model = Model(18, classes_num * 2, 'CBAM')
    else:
        model = Model(classes_num * 2)

    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = TestDataGenerator(
        test_feature_hdf5_path=test_feature_hdf5_path,
        segment_seconds=segment_seconds,
        hop_seconds=hop_seconds,
        pad_type=pad_type,
        scalar=scalar,
        batch_size=batch_size)

    generate_func = data_generator.generate_test()

    # Results of segments
    output_dict = forward_infer(model=model,
                                generate_func=generate_func,
                                cuda=cuda)

    # Results of audio recordings
    result_dict = segment_prediction_to_clip_prediction(output_dict,
                                                        average='arithmetic')

    # Write submission
    write_submission(result_dict, submission_path)
示例#13
0
def calculate_metrics(args):
    """Calculate metrics.

    Args:
      dataset_dir: str
      workspace: str
      holdout_fold: '1'
      model_type: str, e.g., 'Cnn_9layers_Gru_FrameAtt'
      loss_type: str, e.g., 'clip_bce'
      augmentation: str, e.g., 'mixup'
      batch_size: int
      iteration: int
      data_type: 'test' | 'evaluate'
      at_thresholds: bool
      sed_thresholds: bool
    """
    
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    filename = args.filename
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    loss_type = args.loss_type
    augmentation = args.augmentation
    batch_size = args.batch_size
    iteration = args.iteration
    data_type = args.data_type
    at_thresholds = args.at_thresholds
    sed_thresholds = args.sed_thresholds

    classes_num = config.classes_num

    # Paths
    if data_type == 'test':
        reference_csv_path = os.path.join(dataset_dir, 'metadata', 
            'groundtruth_strong_label_testing_set.csv')
    
    elif data_type == 'evaluate':
        reference_csv_path = os.path.join(dataset_dir, 'metadata', 
            'groundtruth_strong_label_evaluation_set.csv')
        
    prediction_path = os.path.join(workspace, 'predictions', 
        '{}'.format(filename), 'holdout_fold={}'.format(holdout_fold), 
        'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 
        'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size),
        '{}_iterations.prediction.{}.pkl'.format(iteration, data_type))
    
    tmp_submission_path = os.path.join(workspace, '_tmp_submission', 
        '{}'.format(filename), 'holdout_fold={}'.format(holdout_fold), 
        'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 
        'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size),
        '_submission.csv')

    # Load thresholds
    if at_thresholds:
        at_thresholds_path = os.path.join(workspace, 'opt_thresholds', 
            '{}'.format(filename), 'holdout_fold={}'.format(holdout_fold), 
            'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 
            'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size),
            '{}_iterations.at.test.pkl'.format(iteration))
        at_thresholds = pickle.load(open(at_thresholds_path, 'rb'))
    else:
        at_thresholds = [0.3] * classes_num

    if sed_thresholds:
        sed_thresholds_path = os.path.join(workspace, 'opt_thresholds', 
            '{}'.format(filename), 'holdout_fold={}'.format(holdout_fold), 
            'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 
            'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size),
            '{}_iterations.sed.test.pkl'.format(iteration))
        sed_thresholds = pickle.load(open(sed_thresholds_path, 'rb'))
    else:
        sed_thresholds = {
            'audio_tagging_threshold': 0.5, 
            'sed_high_threshold': 0.3, 
            'sed_low_threshold': 0.1, 
            'n_smooth': 10, 
            'n_salt': 10}

    # Load predictions
    output_dict = pickle.load(open(prediction_path, 'rb'))

    print('------ Audio tagging results ------')
    # Macro mAP
    mAP = metrics.average_precision_score(output_dict['target'], 
        output_dict['clipwise_output'], average='macro')
    
    # Micro precision, recall, F1
    (precision, recall, f1) = calculate_precision_recall_f1(
        output_dict['target'], output_dict['clipwise_output'], 
        thresholds=at_thresholds)

    print('Macro mAP: {:.3f}'.format(mAP))
    print('Micro precision: {:.3f}'.format(precision))
    print('Micro recall: {:.3f}'.format(recall))
    print('Micro F1: {:.3f}'.format(f1))

    print('------ Sound event detection ------')

    predict_event_list = frame_prediction_to_event_prediction(output_dict, 
        sed_thresholds)

    # Write predicted events to submission file
    write_submission(predict_event_list, tmp_submission_path)

    # SED with official tool
    results = official_evaluate(reference_csv_path, tmp_submission_path)
    
    sed_precision = get_metric(results, 'precision')
    sed_recall = get_metric(results, 'recall')
    sed_f1 = get_metric(results, 'f1')
    sed_er = get_metric(results, 'er')

    print('Micro precision: {:.3f}'.format(sed_precision))
    print('Micro recall: {:.3f}'.format(sed_recall))
    print('Micro F1: {:.3f}'.format(sed_f1))
    print('Micro ER: {:.3f}'.format(sed_er))
示例#14
0
def calculate_metrics(args):
    """Calculate metrics with optimized thresholds
    """

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    freeze_base = args.freeze_base
    loss_type = args.loss_type
    augmentation = args.augmentation
    learning_rate = args.learning_rate
    batch_size = args.batch_size
    few_shots = args.few_shots
    random_seed = args.random_seed
    iteration = args.iteration
    filename = args.filename
    mini_data = False
    pretrain = False

    classes_num = config.classes_num

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    test_hdf5_path = os.path.join(workspace, 'features',
                                  'testing.waveform.h5'.format(prefix))

    evaluate_hdf5_path = os.path.join(workspace, 'features',
                                      'evaluation.waveform.h5'.format(prefix))

    test_reference_csv_path = os.path.join(
        dataset_dir, 'metadata', 'groundtruth_strong_label_testing_set.csv')

    evaluate_reference_csv_path = os.path.join(
        dataset_dir, 'metadata', 'groundtruth_strong_label_evaluation_set.csv')

    predictions_dir = os.path.join(workspace, 'predictions', '{}{}'.format(
        prefix, filename), 'holdout_fold={}'.format(holdout_fold), model_type,
                                   'pretrain={}'.format(pretrain),
                                   'loss_type={}'.format(loss_type),
                                   'augmentation={}'.format(augmentation),
                                   'few_shots={}'.format(few_shots),
                                   'random_seed={}'.format(random_seed),
                                   'freeze_base={}'.format(freeze_base),
                                   'batch_size={}'.format(batch_size))

    tmp_submission_path = os.path.join(
        workspace, '_tmp_submission', '{}{}'.format(prefix, filename),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size), 'few_shots={}'.format(few_shots),
        'random_seed={}'.format(random_seed),
        'freeze_base={}'.format(freeze_base), '_submission.csv')

    post_processing_params_dir = os.path.join(
        workspace, 'post_processing_params', '{}{}'.format(prefix, filename),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type),
        'augmentation={}'.format(augmentation),
        'batch_size={}'.format(batch_size), 'few_shots={}'.format(few_shots),
        'random_seed={}'.format(random_seed),
        'freeze_base={}'.format(freeze_base))

    t1 = time.time()

    # Calculate metrics for AT
    if True:
        print('------ AT ------')

        # Load auto thresholds
        post_processing_params_path = os.path.join(post_processing_params_dir,
                                                   'at_f1.npy')
        auto_thresholds = cPickle.load(open(post_processing_params_path, 'rb'))

        average = 'micro'

        # ------ Test metrics ------
        # Paths
        prediction_path = os.path.join(
            predictions_dir,
            '{}_iterations.prediction.test.h5'.format(iteration))

        # Load ground truth weak target
        with h5py.File(test_hdf5_path, 'r') as hf:
            weak_target = hf['weak_target'][:].astype(np.float32)

        # Load prediction probability
        (clipwise_prediction,
         framewise_prediction) = load_prediction(prediction_path)

        # Macro mAP
        mAP = metrics.average_precision_score(weak_target,
                                              clipwise_prediction,
                                              average='macro')
        print('test macro mAP: {:.3f}'.format(mAP))

        # Metrics without thresholds optimization
        manual_thres_f1 = calculate_f1(weak_target,
                                       clipwise_prediction,
                                       thresholds=[0.3] * classes_num,
                                       average=average)

        manual_thres_prec, manual_thres_recall = calculate_precision_recall(
            weak_target,
            clipwise_prediction,
            thresholds=[0.3] * classes_num,
            average=average)

        print('(no_opt_thres) test f1: {:.3f}, prec: {:.3f}, recall: {:.3f}'.
              format(manual_thres_f1, manual_thres_prec, manual_thres_recall))

        # Metrics with thresholds optimization
        auto_thres_f1 = calculate_f1(weak_target,
                                     clipwise_prediction,
                                     thresholds=auto_thresholds,
                                     average=average)
        auto_thres_prec, auto_thres_recall = calculate_precision_recall(
            weak_target,
            clipwise_prediction,
            thresholds=auto_thresholds,
            average=average)

        print('(opt_thres)    test f1: {:.3f}, prec: {:.3f}, recall: {:.3f}'.
              format(auto_thres_f1, auto_thres_prec, auto_thres_recall))

        # ------ Evaluate metrics ------
        # Paths
        prediction_path = os.path.join(
            predictions_dir,
            '{}_iterations.prediction.{}.h5'.format(iteration, 'evaluate'))

        # Load ground truth weak target
        with h5py.File(evaluate_hdf5_path, 'r') as hf:
            weak_target = hf['weak_target'][:].astype(np.float32)

        # Load prediction probability
        (clipwise_prediction,
         framewise_prediction) = load_prediction(prediction_path)

        # Macro mAP
        mAP = metrics.average_precision_score(weak_target,
                                              clipwise_prediction,
                                              average='macro')
        print('evaluate macro mAP: {:.3f}'.format(mAP))

        # Metrics without thresholds optimization
        manual_thres_f1 = calculate_f1(weak_target,
                                       clipwise_prediction,
                                       thresholds=[0.3] * classes_num,
                                       average=average)
        manual_thres_prec, manual_thres_recall = calculate_precision_recall(
            weak_target,
            clipwise_prediction,
            thresholds=[0.3] * classes_num,
            average=average)

        print(
            '(no_opt_thres) evaluate f1: {:.3f}, prec: {:.3f}, recall: {:.3f}'.
            format(manual_thres_f1, manual_thres_prec, manual_thres_recall))

        # Metrics with thresholds optimization
        auto_thres_f1 = calculate_f1(weak_target,
                                     clipwise_prediction,
                                     auto_thresholds,
                                     average=average)
        auto_thres_prec, auto_thres_recall = calculate_precision_recall(
            weak_target,
            clipwise_prediction,
            thresholds=auto_thresholds,
            average=average)

        print(
            '(opt_thres)    evaluate f1: {:.3f}, prec: {:.3f}, recall: {:.3f}'.
            format(auto_thres_f1, auto_thres_prec, auto_thres_recall))
        print()

    # Calculate metrics for SED
    if True:
        print('------ SED ------')

        # Initial thresholds for SED
        sed_params_dict = {
            'audio_tagging_threshold': [0.3] * classes_num,
            'sed_high_threshold': [0.3] * classes_num,
            'sed_low_threshold': [0.05] * classes_num,
            'n_smooth': [1] * classes_num,
            'n_salt': [1] * classes_num
        }

        for metric_idx, metric_type in enumerate(['f1', 'er']):
            print('*** Metric type: {} ***'.format(metric_type))

            # Load optimized thresholds
            post_processing_params_path = os.path.join(
                post_processing_params_dir, 'sed_{}.npy'.format(metric_type))

            auto_sed_params_dict = cPickle.load(
                open(post_processing_params_path, 'rb'))

            # ------ Test ------
            # Paths
            prediction_path = os.path.join(
                predictions_dir,
                '{}_iterations.prediction.test.h5'.format(iteration))

            # Load ground truth strong target
            with h5py.File(test_hdf5_path, 'r') as hf:
                audio_name = [name.decode() for name in hf['audio_name'][:]]
                strong_target = hf['strong_target'][:].astype(np.float32)

            # Load prediction probability
            (clipwise_prediction,
             framewise_prediction) = load_prediction(prediction_path)

            output_dict = {
                'audio_name': audio_name,
                'clipwise_output': clipwise_prediction,
                'framewise_output': framewise_prediction
            }

            # Macro framewise mAP
            if metric_idx == 0:
                mAP = metrics.average_precision_score(
                    strong_target.reshape(
                        (strong_target.shape[0] * strong_target.shape[1],
                         strong_target.shape[2])),
                    framewise_prediction.reshape(
                        (framewise_prediction.shape[0] *
                         framewise_prediction.shape[1],
                         framewise_prediction.shape[2])),
                    average='macro')

                print('test macro mAP: {:.3f}'.format(mAP))

            # Eventwise prediction without thresholds optimization
            predict_event_list = frame_prediction_to_event_prediction(
                output_dict, sed_params_dict)
            write_submission(predict_event_list, tmp_submission_path)
            results = official_evaluate(test_reference_csv_path,
                                        tmp_submission_path)

            metric = _get_metric(results, metric_type)
            print('(no_opt_thres) test {}: {:.3f}'.format(metric_type, metric))

            # Eventwise prediction with thresholds optimization
            predict_event_list = frame_prediction_to_event_prediction(
                output_dict, auto_sed_params_dict)
            write_submission(predict_event_list, tmp_submission_path)
            results = official_evaluate(test_reference_csv_path,
                                        tmp_submission_path)
            metric = _get_metric(results, metric_type)
            print('(opt_thres)    test {}: {:.3f}'.format(metric_type, metric))

            # ------ Evaluate ------
            # Paths
            prediction_path = os.path.join(
                predictions_dir,
                '{}_iterations.prediction.evaluate.h5'.format(iteration))

            # Load ground truth strong target
            with h5py.File(evaluate_hdf5_path, 'r') as hf:
                audio_name = [name.decode() for name in hf['audio_name'][:]]
                strong_target = hf['strong_target'][:].astype(np.float32)

            # Load prediction probability
            (clipwise_prediction,
             framewise_prediction) = load_prediction(prediction_path)

            output_dict = {
                'audio_name': audio_name,
                'clipwise_output': clipwise_prediction,
                'framewise_output': framewise_prediction
            }

            # Macro framewise mAP
            if metric_idx == 0:
                mAP = metrics.average_precision_score(
                    strong_target.reshape(
                        (strong_target.shape[0] * strong_target.shape[1],
                         strong_target.shape[2])),
                    framewise_prediction.reshape(
                        (framewise_prediction.shape[0] *
                         framewise_prediction.shape[1],
                         framewise_prediction.shape[2])),
                    average='macro')

                print('evaluate mAP: {:.3f}'.format(mAP))

            # Eventwise prediction without thresholds optimization
            predict_event_list = frame_prediction_to_event_prediction(
                output_dict, sed_params_dict)
            write_submission(predict_event_list, tmp_submission_path)
            results = official_evaluate(evaluate_reference_csv_path,
                                        tmp_submission_path)
            value = _get_metric(results, metric_type)
            print('(no_opt_thres) evaluate {}: {:.3f}'.format(
                metric_type, value))

            # Metrics with thresholds optimization
            predict_event_list = frame_prediction_to_event_prediction(
                output_dict, auto_sed_params_dict)
            write_submission(predict_event_list, tmp_submission_path)
            results = official_evaluate(evaluate_reference_csv_path,
                                        tmp_submission_path)
            value = _get_metric(results, metric_type)
            print('(opt_thres)    evaluate {}: {:.3f}'.format(
                metric_type, value))
            print()

        print('time: {:.3f} s'.format(time.time() - t1))