Пример #1
0
def write_test_file(data_dir, output_file, trial_length):
    annotations, max_length, speakers = read_annotaitons(data_dir)
    # create an artificial non-overlapping segments each of the trial_length size
    trial_segments = Timeline()
    for i in range(0, int(max_length) // trial_length):
        trial_segments.add(Segment(start=i*trial_length, end=(i+1)*trial_length))

    with open(output_file, 'w') as f:
        for label in speakers.keys():
            for annotation in annotations:
                # make sure our trial segments are not extending beyond the total length of the speech data
                support = annotation.get_timeline().extent()
                # we consider smaller segment here to make sure an embedding of 3 seconds can be computed
                adjusted_trial_segments = trial_segments.crop(Segment(start=support.start, end=support.end - 3.),
                                                              mode='loose')
                uri = annotation.uri
                cur_timeline = annotation.label_timeline(label, copy=False)
                for trial_segment in adjusted_trial_segments:
                    cropped_speaker = cur_timeline.crop(trial_segment, mode='intersection')
                    if not cropped_speaker:
                        f.write('{0} {1} {2:0>7.2f} {3:0>7.2f} nontarget - -\n'.format(
                            label,
                            uri,
                            trial_segment.start,
                            trial_segment.end))
                    else:
                        f.write('{0} {1} {2:0>7.2f} {3:0>7.2f} target {4:0>7.2f} {5:0>7.2f}\n'.format(
                            label,
                            uri,
                            trial_segment.start,
                            trial_segment.end,
                            cropped_speaker[0].start,
                            cropped_speaker[0].duration))
Пример #2
0
def test_crop_mapping():
    timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)])
    cropped, mapping = timeline.crop(Segment(1, 2), returns_mapping=True)

    expected_cropped = Timeline([Segment(1, 2)])
    assert cropped == expected_cropped

    expected_mapping = {Segment(1, 2): [Segment(0, 2), Segment(1, 2)]}
    assert mapping == expected_mapping
Пример #3
0
def test_union():
    first_timeline = Timeline([Segment(0, 1), Segment(2, 3), Segment(4, 5)])
    second_timeline = Timeline([Segment(1.5, 4.5)])

    assert first_timeline.union(second_timeline) == Timeline(
        [Segment(0, 1),
         Segment(1.5, 4.5),
         Segment(2, 3),
         Segment(4, 5)])

    assert second_timeline.crop(first_timeline) == Timeline(
        [Segment(2, 3), Segment(4, 4.5)])

    assert list(first_timeline.co_iter(second_timeline)) == [
        (Segment(2, 3), Segment(1.5, 4.5)), (Segment(4, 5), Segment(1.5, 4.5))
    ]
Пример #4
0
def test_union():
    first_timeline = Timeline([Segment(0, 1),
                               Segment(2, 3),
                               Segment(4, 5)])
    second_timeline = Timeline([Segment(1.5, 4.5)])

    assert first_timeline.union(second_timeline) == Timeline([Segment(0, 1),
                                                              Segment(1.5, 4.5),
                                                              Segment(2, 3),
                                                              Segment(4, 5)])

    assert second_timeline.crop(first_timeline) == Timeline([Segment(2, 3),
                                                             Segment(4, 4.5)])

    assert list(first_timeline.co_iter(second_timeline)) == [(Segment(2, 3), Segment(1.5, 4.5)),
                                                             (Segment(4, 5), Segment(1.5, 4.5))]
Пример #5
0
    def remove_excluded(self):
        if len(self.excluded) == 0:
            return

        from pyannote.core import Segment, Timeline

        segments = []
        for recording, _segments in self.segments.groupby(
                "recording_filename"):
            sampled = Timeline(segments=[
                Segment(segment_onset, segment_offset)
                for segment_onset, segment_offset in _segments[
                    ["segment_onset", "segment_offset"]].values
            ])

            excl_segments = self.excluded.loc[
                self.excluded["recording_filename"] == recording]
            excl = Timeline(segments=[
                Segment(segment_onset, segment_offset)
                for segment_onset, segment_offset in excl_segments[
                    ["segment_onset", "segment_offset"]].values
            ])

            # sampled = sampled.extrude(sampled) # not released yet
            extent_tl = Timeline([sampled.extent()], uri=sampled.uri)
            truncating_support = excl.gaps(support=extent_tl)
            sampled = sampled.crop(truncating_support, mode="intersection")

            segments.append(
                pd.DataFrame(
                    [[recording, s.start, s.end] for s in sampled],
                    columns=[
                        "recording_filename", "segment_onset", "segment_offset"
                    ],
                ))

        self.segments = pd.concat(segments)
Пример #6
0
    def __call__(self, reference, hypothesis):

        if isinstance(reference, Annotation):
            reference = reference.get_timeline()

        if isinstance(hypothesis, Annotation):
            hypothesis = hypothesis.get_timeline()

        # over-segmentation
        over = Timeline(uri=reference.uri)
        prev_r = reference[0]
        intersection = []
        for r, h in reference.co_iter(hypothesis):

            if r != prev_r:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    over.add(segment)
                intersection = []
                prev_r = r

            segment = r & h
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            over.add(segment)

        # under-segmentation
        under = Timeline(uri=reference.uri)
        prev_h = hypothesis[0]
        intersection = []
        for h, r in hypothesis.co_iter(reference):

            if h != prev_h:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    under.add(segment)
                intersection = []
                prev_h = h

            segment = h & r
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            under.add(segment)

        # extent
        extent = reference.extent()

        # correct (neither under- nor over-segmented)
        correct = under.union(over).gaps(support=extent)

        # frontier error (both under- and over-segmented)
        frontier = under.crop(over)

        # under-segmented
        not_over = over.gaps(support=extent)
        only_under = under.crop(not_over)

        # over-segmented
        not_under = under.gaps(support=extent)
        only_over = over.crop(not_under)

        status = Annotation(uri=reference.uri)
        # for segment in correct:
        #     status[segment, '_'] = 'correct'
        for segment in frontier:
            status[segment, '_'] = 'shift'
        for segment in only_over:
            status[segment, '_'] = 'over-segmentation'
        for segment in only_under:
            status[segment, '_'] = 'under-segmentation'

        return status.support()
Пример #7
0
    def __call__(self, reference, hypothesis):

        if isinstance(reference, Annotation):
            reference = reference.get_timeline()

        if isinstance(hypothesis, Annotation):
            hypothesis = hypothesis.get_timeline()

        # over-segmentation
        over = Timeline(uri=reference.uri)
        prev_r = reference[0]
        intersection = []
        for r, h in reference.co_iter(hypothesis):

            if r != prev_r:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    over.add(segment)
                intersection = []
                prev_r = r

            segment = r & h
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            over.add(segment)

        # under-segmentation
        under = Timeline(uri=reference.uri)
        prev_h = hypothesis[0]
        intersection = []
        for h, r in hypothesis.co_iter(reference):

            if h != prev_h:
                intersection = sorted(intersection)
                for _, segment in intersection[:-1]:
                    under.add(segment)
                intersection = []
                prev_h = h

            segment = h & r
            intersection.append((segment.duration, segment))

        intersection = sorted(intersection)
        for _, segment in intersection[:-1]:
            under.add(segment)

        # extent
        extent = reference.extent()

        # correct (neither under- nor over-segmented)
        correct = under.union(over).gaps(focus=extent)

        # frontier error (both under- and over-segmented)
        frontier = under.crop(over)

        # under-segmented
        not_over = over.gaps(focus=extent)
        only_under = under.crop(not_over)

        # over-segmented
        not_under = under.gaps(focus=extent)
        only_over = over.crop(not_under)

        status = Annotation(uri=reference.uri)
        for segment in correct:
            status[segment, '_'] = 'correct'
        for segment in frontier:
            status[segment, '_'] = 'frontier'
        for segment in only_over:
            status[segment, '_'] = 'over'
        for segment in only_under:
            status[segment, '_'] = 'under'

        return status.smooth()