Пример #1
0
    def test_make_cuts_from_recordings_features_supervisions(
            self, dummy_recording_set, dummy_feature_set,
            dummy_supervision_set):
        cut_set = CutSet.from_manifests(recordings=dummy_recording_set,
                                        supervisions=dummy_supervision_set,
                                        features=dummy_feature_set)
        cut1 = cut_set[0]
        assert cut1.start == 0
        assert cut1.duration == 10.0
        assert cut1.end == 10.0
        assert cut1.channel == 0

        assert len(cut1.supervisions) == 1
        assert cut1.supervisions[0].id == 'sup1'
        assert cut1.supervisions[0].recording_id == 'rec1'
        assert cut1.supervisions[0].start == 3.0
        assert cut1.supervisions[0].end == 7.0
        assert cut1.supervisions[0].channel == 0
        assert cut1.supervisions[0].text == 'dummy text'

        assert cut1.has_recording
        assert cut1.recording == dummy_recording_set.recordings['rec1']
        assert cut1.sampling_rate == 16000
        assert cut1.recording_id == 'rec1'
        assert cut1.num_samples == 160000

        assert cut1.has_features
        assert cut1.features == dummy_feature_set.features[0]
        assert cut1.frame_shift == 0.01
        assert cut1.num_frames == 1000
        assert cut1.num_features == 23
        assert cut1.features_type == 'fbank'
Пример #2
0
def random_mixed(supervision_manifest: Pathlike, feature_manifest: Pathlike,
                 output_cut_manifest: Pathlike, snr_range: Tuple[float, float],
                 offset_range: Tuple[float, float]):
    """
    Create a CutSet stored in OUTPUT_CUT_MANIFEST that contains supervision regions from SUPERVISION_MANIFEST
    and features supplied by FEATURE_MANIFEST. It first creates a trivial CutSet, splits it into two equal, randomized
    parts and mixes their features.
    The parameters of the mix are controlled via SNR_RANGE and OFFSET_RANGE.
    """
    supervision_set = SupervisionSet.from_json(supervision_manifest)
    feature_set = FeatureSet.from_json(feature_manifest)

    source_cut_set = CutSet.from_manifests(supervisions=supervision_set,
                                           features=feature_set)
    left_cuts, right_cuts = source_cut_set.split(num_splits=2, shuffle=True)

    snrs = np.random.uniform(*snr_range, size=len(left_cuts)).tolist()
    relative_offsets = np.random.uniform(*offset_range,
                                         size=len(left_cuts)).tolist()

    mixed_cut_set = CutSet.from_cuts(
        left_cut.mix(right_cut,
                     offset_other_by=left_cut.duration * relative_offset,
                     snr=snr)
        for left_cut, right_cut, snr, relative_offset in zip(
            left_cuts, right_cuts, snrs, relative_offsets))
    mixed_cut_set.to_json(output_cut_manifest)
Пример #3
0
    def test_make_cuts_from_recordings_supervisions(self, dummy_recording_set,
                                                    dummy_supervision_set):
        cut_set = CutSet.from_manifests(recordings=dummy_recording_set,
                                        supervisions=dummy_supervision_set)
        cut1 = cut_set[0]
        assert cut1.start == 0
        assert cut1.duration == 10.0
        assert cut1.end == 10.0
        assert cut1.channel == 0

        assert len(cut1.supervisions) == 1
        assert cut1.supervisions[0].id == "sup1"
        assert cut1.supervisions[0].recording_id == "rec1"
        assert cut1.supervisions[0].start == 3.0
        assert cut1.supervisions[0].end == 7.0
        assert cut1.supervisions[0].channel == 0
        assert cut1.supervisions[0].text == "dummy text"

        assert cut1.has_recording
        assert cut1.recording == dummy_recording_set.recordings["rec1"]
        assert cut1.sampling_rate == 16000
        assert cut1.recording_id == "rec1"
        assert cut1.num_samples == 160000

        assert not cut1.has_features
        assert cut1.features is None
        assert cut1.frame_shift is None
        assert cut1.num_frames is None
        assert cut1.num_features is None
        assert cut1.features_type is None
Пример #4
0
 def __init__(
     self,
     cuts: CutSet,
     uem: Optional[SupervisionSet] = None,
     min_speaker_dim: Optional[int] = None,
     global_speaker_ids: bool = False,
 ) -> None:
     super().__init__()
     validate(cuts)
     if not uem:
         self.cuts = cuts
     else:
         # We use the `overlap` method in intervaltree to get overlapping regions
         # between the supervision segments and the UEM segments
         recordings = RecordingSet(
             {c.recording.id: c.recording
              for c in cuts if c.has_recording})
         uem_intervals = CutSet.from_manifests(
             recordings=recordings,
             supervisions=uem,
         ).index_supervisions()
         supervisions = []
         for cut_id, tree in cuts.index_supervisions().items():
             if cut_id not in uem_intervals:
                 supervisions += [it.data for it in tree]
                 continue
             supervisions += {
                 it.data.trim(it.end, start=it.begin)
                 for uem_it in uem_intervals[cut_id]
                 for it in tree.overlap(begin=uem_it.begin, end=uem_it.end)
             }
         self.cuts = CutSet.from_manifests(
             recordings=recordings,
             supervisions=SupervisionSet.from_segments(supervisions),
         )
     self.speakers = ({
         spk: idx
         for idx, spk in enumerate(self.cuts.speakers)
     } if global_speaker_ids else None)
     self.min_speaker_dim = min_speaker_dim
Пример #5
0
def test_known_issue_with_overlap():
    r = dummy_recording(0)
    rec = RecordingSet.from_recordings([r])

    # Make two segments. The first segment is 1s long. The segment segment
    # is 0.3 seconds long and lies entirely within the first. Both have the
    # same recording_id as the single entry in rec.
    sup = SupervisionSet.from_segments(
        [
            SupervisionSegment(
                id="utt1",
                recording_id=r.id,
                start=0.0,
                duration=1.0,
                channel=0,
                text="Hello",
            ),
            SupervisionSegment(
                id="utt2",
                recording_id=r.id,
                start=0.2,
                duration=0.5,
                channel=0,
                text="World",
            ),
        ]
    )

    cuts = CutSet.from_manifests(recordings=rec, supervisions=sup)
    assert len(cuts) == 1

    cuts_trim = cuts.trim_to_supervisions(keep_overlapping=False)
    assert len(cuts_trim) == 2

    cut = cuts_trim[0]
    assert cut.start == 0
    assert cut.duration == 1
    assert len(cut.supervisions) == 1
    sup = cut.supervisions[0]
    assert sup.start == 0
    assert sup.duration == 1
    assert sup.text == "Hello"

    cut = cuts_trim[1]
    assert cut.start == 0.2
    assert cut.duration == 0.5
    assert len(cut.supervisions) == 1
    sup = cut.supervisions[0]
    assert sup.start == 0
    assert sup.duration == 0.5
    assert sup.text == "World"
Пример #6
0
    def test_make_cuts_from_recordings_features_supervisions(
        self,
        dummy_recording_set_lazy,
        dummy_feature_set_lazy,
        dummy_supervision_set_lazy,
    ):
        with NamedTemporaryFile(suffix=".jsonl.gz") as f:
            cut_set = CutSet.from_manifests(
                recordings=dummy_recording_set_lazy,
                supervisions=dummy_supervision_set_lazy,
                features=dummy_feature_set_lazy,
                lazy=True,
                output_path=f.name,
            )
            f.flush()
            cut1 = cut_set[0]
            assert cut1.start == 0
            assert cut1.duration == 10.0
            assert cut1.end == 10.0
            assert cut1.channel == 0

            assert len(cut1.supervisions) == 2
            assert cut1.supervisions[0].id == "sup1"
            assert cut1.supervisions[0].recording_id == "rec1"
            assert cut1.supervisions[0].start == 3.0
            assert cut1.supervisions[0].end == 7.0
            assert cut1.supervisions[0].channel == 0
            assert cut1.supervisions[0].text == "dummy text"
            assert cut1.supervisions[1].id == "sup2"
            assert cut1.supervisions[1].recording_id == "rec1"
            assert cut1.supervisions[1].start == 7.0
            assert cut1.supervisions[1].end == 9.0
            assert cut1.supervisions[1].channel == 0
            assert cut1.supervisions[1].text == "dummy text"

            assert cut1.has_recording
            assert cut1.sampling_rate == 16000
            assert cut1.recording_id == "rec1"
            assert cut1.num_samples == 160000

            assert cut1.has_features
            assert cut1.frame_shift == 0.01
            assert cut1.num_frames == 1000
            assert cut1.num_features == 23
            assert cut1.features_type == "fbank"
Пример #7
0
def simple(
        output_cut_manifest: Pathlike,
        recording_manifest: Optional[Pathlike],
        feature_manifest: Optional[Pathlike],
        supervision_manifest: Optional[Pathlike],
):
    """
    Create a CutSet stored in OUTPUT_CUT_MANIFEST. Depending on the provided options, it may contain any combination
    of recording, feature and supervision manifests.
    Either RECORDING_MANIFEST or FEATURE_MANIFEST has to be provided.
    When SUPERVISION_MANIFEST is provided, the cuts time span will correspond to that of the supervision segments.
    Otherwise, that time span corresponds to the one found in features, if available, otherwise recordings.
    """
    supervision_set, feature_set, recording_set = [
        load_manifest(p) if p is not None else None
        for p in (supervision_manifest, feature_manifest, recording_manifest)
    ]
    cut_set = CutSet.from_manifests(recordings=recording_set, supervisions=supervision_set, features=feature_set)
    cut_set.to_file(output_cut_manifest)
Пример #8
0
def test_make_cuts_from_features(dummy_feature_set):
    cut_set = CutSet.from_manifests(features=dummy_feature_set)
    cut1 = cut_set[0]
    assert cut1.start == 0
    assert cut1.duration == 10.0
    assert cut1.end == 10.0
    assert cut1.channel == 0

    assert len(cut1.supervisions) == 0

    assert not cut1.has_recording
    assert cut1.recording is None
    assert cut1.sampling_rate == 16000
    assert cut1.recording_id == 'rec1'
    assert cut1.num_samples is None

    assert cut1.has_features
    assert cut1.features == dummy_feature_set.features[0]
    assert cut1.frame_shift == 0.01
    assert cut1.num_frames == 1000
    assert cut1.num_features == 23
    assert cut1.features_type == 'fbank'
Пример #9
0
def test_make_cuts_from_features_with_random_ids(dummy_feature_set):
    cut_set = CutSet.from_manifests(features=dummy_feature_set,
                                    random_ids=True)
    for idx, cut in enumerate(cut_set):
        assert cut.id != f'{cut.recording_id}-{idx}-{cut.channel}'
Пример #10
0
def test_make_cuts_from_recordings_with_deterministic_ids(dummy_recording_set):
    cut_set = CutSet.from_manifests(recordings=dummy_recording_set,
                                    random_ids=False)
    for idx, cut in enumerate(cut_set):
        assert cut.id == f'{cut.recording_id}-{idx}-{cut.channel}'