def vad_metrics(predictions,
                reference_segments,
                sr=22050,
                window_length=int(np.floor(0.032 * 22050)),
                hop_length=int(np.floor(0.016 * 22050))):
    frame_times = librosa.frames_to_time(range(len(predictions)),
                                         sr=sr,
                                         hop_length=hop_length,
                                         n_fft=window_length)
    predicted_segments = voice_segments(predictions, frame_times)

    hypothesis = Annotation()
    for seg in predicted_segments:
        hypothesis[Segment(seg[0], seg[1])] = 1

    reference = Annotation()
    for seg in reference_segments:
        reference[Segment(seg[0], seg[1])] = 1

    precision = DetectionPrecision()(reference, hypothesis)
    error = DetectionErrorRate()(reference, hypothesis)
    recall = DetectionRecall()(reference, hypothesis)
    accuracy = DetectionAccuracy()(reference, hypothesis)

    metrics = {
        "precision": precision,
        "error": error,
        "recall": recall,
        "accuracy": accuracy
    }

    print(metrics)

    return metrics
def get_detection_metrics(reference, hypothesis, uem=None):
    metric_dict = {}
    metric = DetectionErrorRate()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met
    metric = DetectionAccuracy()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met
    metric = DetectionPrecision()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met
    metric = DetectionRecall()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met

    return metric_dict
    def get_metric(
        self, parallel=False
    ) -> Union[DetectionErrorRate, DetectionPrecisionRecallFMeasure]:
        """Return new instance of detection metric"""

        if self.fscore:
            return DetectionPrecisionRecallFMeasure(
                collar=0.0, skip_overlap=False, parallel=parallel
            )
        else:
            return DetectionErrorRate(collar=0.0, skip_overlap=False, parallel=parallel)
Exemplo n.º 4
0
def test_detailed(reference, hypothesis):
    detectionErrorRate = DetectionErrorRate()
    details = detectionErrorRate(reference, hypothesis, detailed=True)

    rate = details['detection error rate']
    npt.assert_almost_equal(rate, 0.3125, decimal=7)

    false_alarm = details['false alarm']
    npt.assert_almost_equal(false_alarm, 3.0, decimal=7)

    missed_detection = details['miss']
    npt.assert_almost_equal(missed_detection, 2.0, decimal=7)

    total = details['total']
    npt.assert_almost_equal(total, 16.0, decimal=7)
def detection(protocol, subset, hypotheses, collar=0.0, skip_overlap=False):

    options = {
        'collar': collar,
        'skip_overlap': skip_overlap,
        'parallel': True
    }

    metrics = {
        'error': DetectionErrorRate(**options),
        'accuracy': DetectionAccuracy(**options),
        'precision': DetectionPrecision(**options),
        'recall': DetectionRecall(**options)
    }

    reports = get_reports(protocol, subset, hypotheses, metrics)

    report = metrics['error'].report(display=False)
    accuracy = metrics['accuracy'].report(display=False)
    precision = metrics['precision'].report(display=False)
    recall = metrics['recall'].report(display=False)

    report['accuracy', '%'] = accuracy[metrics['accuracy'].name, '%']
    report['precision', '%'] = precision[metrics['precision'].name, '%']
    report['recall', '%'] = recall[metrics['recall'].name, '%']

    report = reindex(report)

    columns = list(report.columns)
    report = report[[columns[0]] + columns[-3:] + columns[1:-3]]

    summary = 'Detection (collar = {0:g} ms{1})'.format(
        1000 * collar, ', no overlap' if skip_overlap else '')

    headers = [summary] + \
              [report.columns[i][0] for i in range(4)] + \
              ['%' if c[1] == '%' else c[0] for c in report.columns[4:]]

    print(
        tabulate(report,
                 headers=headers,
                 tablefmt="simple",
                 floatfmt=".2f",
                 numalign="decimal",
                 stralign="left",
                 missingval="",
                 showindex="default",
                 disable_numparse=False))
Exemplo n.º 6
0
def SADError(segments, AudioDataSet, annotationlist, audioLength):
    reference = Annotation()
    treeA = ET.parse(annotationlist[0])
    rootA = treeA.getroot()
    for child in rootA.findall('segment'):
        start, end = float(child.get('transcriber_start')), float(
            child.get('transcriber_end'))
        reference[Segment(start, end)] = 'A'

    treeB = ET.parse(annotationlist[1])
    rootB = treeB.getroot()
    for child in rootB.findall('segment'):
        start, end = float(child.get('transcriber_start')), float(
            child.get('transcriber_end'))
        reference[Segment(start, end)] = 'A'

    treeC = ET.parse(annotationlist[2])
    rootC = treeC.getroot()
    for child in rootC.findall('segment'):
        start, end = float(child.get('transcriber_start')), float(
            child.get('transcriber_end'))
        reference[Segment(start, end)] = 'A'

    treeD = ET.parse(annotationlist[3])
    rootD = treeD.getroot()
    for child in rootD.findall('segment'):
        start, end = float(child.get('transcriber_start')), float(
            child.get('transcriber_end'))
        reference[Segment(start, end)] = 'A'

    hypothesis = Annotation()
    for seg in segments:
        start = seg[0]
        end = seg[1]
        hypothesis[Segment(start, end)] = 'A'

    metric = DetectionErrorRate()
    uem = Timeline([Segment(0, audioLength)])
    print('SAD Error Rate: %.2f %%' %
          (metric(reference, hypothesis, uem=uem) * 100))

    return metric, reference, hypothesis
Exemplo n.º 7
0
        def fun(threshold):

            binarizer = Binarize(onset=threshold,
                                 offset=threshold,
                                 log_scale=False)

            protocol = get_protocol(protocol_name, progress=False,
                                    preprocessors=self.preprocessors_)

            metric = DetectionErrorRate()

            # NOTE -- embarrasingly parallel
            # TODO -- parallelize this
            file_generator = getattr(protocol, subset)()
            for current_file in file_generator:

                uri = get_unique_identifier(current_file)
                hypothesis = binarizer.apply(
                    predictions[uri], dimension=0).to_annotation()
                reference = current_file['annotation']
                uem = get_annotated(current_file)
                _ = metric(reference, hypothesis, uem=uem)

            return abs(metric)
Exemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser(
        description='score SAD output', add_help=True)
    parser.add_argument(
        'ref_segments', metavar='ref-segments', type=Path,
        help='path to reference segments file')
    parser.add_argument(
        'sys_segments', metavar='sys-segments', type=Path,
        help='path to system segments file')
    parser.add_argument(
        'recordings_table', metavar='recordings-table', type=Path,
        help='path to recordings table')
    parser.add_argument(
        '-u,--uem', metavar='FILE', default=None, type=Path, dest='uem_path',
        help='un-partitioned evaluation map file (default: %(default)s)')
    parser.add_argument(
        '--collar', nargs=None, default=0.0, type=float, metavar='DUR',
        help='collar size in seconds (default: %(default)s)')
    parser.add_argument(
        '--precision', nargs=None, default=2, type=int, metavar='DIGITS',
        help='number of decimal places to print (default: %(default)s)')
    parser.add_argument(
        '--table-format', nargs=None, default='simple',
        metavar='FMT',
        help='tabulate table format (default: %(default)s)')
    parser.add_argument(
        '--n-jobs', nargs=None, default=1, type=int, metavar='JOBS',
        help='number of parallel jobs to run (default: %(default)s)')
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args()

    # Load annotations, scoring regions, etc.
    ref_annotations = read_segments_file(args.ref_segments)
    sys_annotations = read_segments_file(args.sys_segments)
    annotated = {}
    if args.uem_path:
        annotated = read_uem_file(args.uem_path)

        ref_annotations = {uri:ann for uri, ann in ref_annotations.items()
                           if uri in annotated}
        sys_annotations = {uri:ann for uri, ann	in sys_annotations.items()
                           if uri in annotated}
    recordings = Recording.annotations_to_recordings(
        ref_annotations, sys_annotations, annotated=annotated)
    domains = load_domains(args.recordings_table)

    # Score in parallel.
    collar = 2*args.collar # Accommodation for how pyannote defines collar.
    kwargs = {
        'collar' : collar,
        'skip_overlap' : False,
        'parallel' : False}
    metrics = {
        'dcf' : DetectionCostFunction(
            fa_weight=0.25, miss_weight=0.75, **kwargs),
        'der' : DetectionErrorRate(**kwargs)}
    per_domain_metrics = score_recordings(
        recordings, metrics, domains=domains, n_jobs=args.n_jobs)

    # Report metrics as table on STDOUT.
    domain_dfs = []
    for dname in per_domain_metrics:
        domain_dfs.append(get_scores_dataframe(
            per_domain_metrics[dname], domain_name=dname))
    scores_df = pd.concat(domain_dfs)
    tbl = tabulate(
        scores_df, showindex=False, headers=scores_df.columns,
        tablefmt=args.table_format, floatfmt=f'.{args.precision}f')
    print(tbl)
Exemplo n.º 9
0
def test_error_rate(reference, hypothesis):
    detectionErrorRate = DetectionErrorRate()
    error_rate = detectionErrorRate(reference, hypothesis)
    npt.assert_almost_equal(error_rate, 0.3125, decimal=7)
def test(dataset, medium_template, config_yml, weights_h5, output_dir):

    # load configuration file
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    # this is where model architecture was saved
    architecture_yml = os.path.dirname(
        os.path.dirname(weights_h5)) + '/architecture.yml'

    # -- DATASET --
    db, task, protocol, subset = dataset.split('.')
    database = get_database(db, medium_template=medium_template)
    protocol = database.get_protocol(task, protocol)

    if not hasattr(protocol, subset):
        raise NotImplementedError('')

    file_generator = getattr(protocol, subset)()

    # -- FEATURE EXTRACTION --
    # input sequence duration
    duration = config['feature_extraction']['duration']
    # MFCCs
    feature_extractor = YaafeMFCC(**config['feature_extraction']['mfcc'])
    # normalization
    normalize = config['feature_extraction']['normalize']

    # -- TESTING --
    # overlap ratio between each window
    overlap = config['testing']['overlap']
    step = duration * (1. - overlap)

    # prediction smoothing
    onset = config['testing']['binarize']['onset']
    offset = config['testing']['binarize']['offset']
    binarizer = Binarize(onset=0.5, offset=0.5)

    sequence_labeling = SequenceLabeling.from_disk(architecture_yml,
                                                   weights_h5)

    aggregation = SequenceLabelingAggregation(sequence_labeling,
                                              feature_extractor,
                                              normalize=normalize,
                                              duration=duration,
                                              step=step)

    collar = 0.500
    error_rate = DetectionErrorRate(collar=collar)
    accuracy = DetectionAccuracy(collar=collar)
    precision = DetectionPrecision(collar=collar)
    recall = DetectionRecall(collar=collar)

    LINE = '{uri} {e:.3f} {a:.3f} {p:.3f} {r:.3f} {f:.3f}\n'

    PATH = '{output_dir}/eval.{dataset}.{subset}.txt'
    path = PATH.format(output_dir=output_dir, dataset=dataset, subset=subset)

    with open(path, 'w') as fp:

        header = '# uri error accuracy precision recall f_measure\n'
        fp.write(header)
        fp.flush()

        for current_file in file_generator:

            uri = current_file['uri']
            wav = current_file['medium']['wav']
            annotated = current_file['annotated']
            annotation = current_file['annotation']

            predictions = aggregation.apply(wav)
            hypothesis = binarizer.apply(predictions, dimension=1)

            e = error_rate(annotation, hypothesis, uem=annotated)
            a = accuracy(annotation, hypothesis, uem=annotated)
            p = precision(annotation, hypothesis, uem=annotated)
            r = recall(annotation, hypothesis, uem=annotated)
            f = f_measure(p, r)

            line = LINE.format(uri=uri, e=e, a=a, p=p, r=r, f=f)
            fp.write(line)
            fp.flush()

            PATH = '{output_dir}/{uri}.json'
            path = PATH.format(output_dir=output_dir, uri=uri)
            dump_to(hypothesis, path)

        # average on whole corpus
        uri = '{dataset}.{subset}'.format(dataset=dataset, subset=subset)
        e = abs(error_rate)
        a = abs(accuracy)
        p = abs(precision)
        r = abs(recall)
        f = f_measure(p, r)
        line = LINE.format(uri=uri, e=e, a=a, p=p, r=r, f=f)
        fp.write(line)
        fp.flush()
Exemplo n.º 11
0
 def get_tune_metric(self):
     return DetectionErrorRate()
Exemplo n.º 12
0
 def get_metric(self) -> Union[DetectionErrorRate, DiarizationErrorRate]:
     if self.only_sad:
         return DetectionErrorRate(collar=0.0)
     else:
         return DiarizationErrorRate(collar=0.0, skip_overlap=False)
Exemplo n.º 13
0
    def validate_epoch(self, epoch, protocol_name, subset='development',
                       validation_data=None):

        # load model for current epoch
        model = self.load_model(epoch).to(self.device)
        model.eval()

        if isinstance(self.feature_extraction_, Precomputed):
            self.feature_extraction_.use_memmap = False

        duration = self.task_.duration
        step = .25 * duration
        sequence_labeling = SequenceLabeling(
            model, self.feature_extraction_, duration=duration,
            step=.25 * duration, batch_size=self.batch_size,
            source='audio', device=self.device)

        protocol = get_protocol(protocol_name, progress=False,
                                preprocessors=self.preprocessors_)

        metric = DetectionErrorRate()

        predictions = {}

        file_generator = getattr(protocol, subset)()
        for current_file in file_generator:
            uri = get_unique_identifier(current_file)
            scores = sequence_labeling.apply(current_file)

            if model.logsoftmax:
                scores = SlidingWindowFeature(
                    1. - np.exp(scores.data[:, 0]),
                    scores.sliding_window)
            else:
                scores = SlidingWindowFeature(
                    1. - scores.data[:, 0],
                    scores.sliding_window)

            predictions[uri] = scores

        def fun(threshold):

            binarizer = Binarize(onset=threshold,
                                 offset=threshold,
                                 log_scale=False)

            protocol = get_protocol(protocol_name, progress=False,
                                    preprocessors=self.preprocessors_)

            metric = DetectionErrorRate()

            # NOTE -- embarrasingly parallel
            # TODO -- parallelize this
            file_generator = getattr(protocol, subset)()
            for current_file in file_generator:

                uri = get_unique_identifier(current_file)
                hypothesis = binarizer.apply(
                    predictions[uri], dimension=0).to_annotation()
                reference = current_file['annotation']
                uem = get_annotated(current_file)
                _ = metric(reference, hypothesis, uem=uem)

            return abs(metric)

        res = scipy.optimize.minimize_scalar(
            fun, bounds=(0., 1.), method='bounded', options={'maxiter': 10})

        return {
            'speech_activity_detection/error': {'minimize': True,
                                                'value': res.fun},
            'speech_activity_detection/threshold': {'minimize': 'NA',
                                                    'value': res.x}}