Пример #1
0
def test_trim_to_supervisions_mixed_cuts():
    cut_set = CutSet.from_cuts([
        Cut('cut1',
            start=0,
            duration=30,
            channel=0,
            supervisions=[
                SupervisionSegment('sup1', 'rec1', start=1.5, duration=8.5),
                SupervisionSegment('sup2', 'rec1', start=10, duration=5),
                SupervisionSegment('sup3', 'rec1', start=20, duration=8),
            ]).append(
                Cut('cut2',
                    start=0,
                    duration=30,
                    channel=0,
                    supervisions=[
                        SupervisionSegment('sup4',
                                           'rec1',
                                           start=0,
                                           duration=30),
                    ]))
    ])
    assert isinstance(cut_set[0], MixedCut)
    cuts = cut_set.trim_to_supervisions()
    assert len(cuts) == 4
    # After "trimming", the MixedCut "decayed" into simple, unmixed cuts, as they did not overlap
    assert all(isinstance(cut, Cut) for cut in cuts)
    assert all(len(cut.supervisions) == 1 for cut in cuts)
    assert all(cut.supervisions[0].start == 0 for cut in cuts)
    cut = cuts[0]
    # Check that the cuts preserved their start/duration/supervisions after trimming
    assert cut.start == 1.5
    assert cut.duration == 8.5
    assert cut.supervisions[0].id == 'sup1'
    cut = cuts[1]
    assert cut.start == 10
    assert cut.duration == 5
    assert cut.supervisions[0].id == 'sup2'
    cut = cuts[2]
    assert cut.start == 20
    assert cut.duration == 8
    assert cut.supervisions[0].id == 'sup3'
    cut = cuts[3]
    assert cut.start == 0
    assert cut.duration == 30
    assert cut.supervisions[0].id == 'sup4'
Пример #2
0
def test_mix_same_recording_channels():
    recording = Recording('rec', sampling_rate=8000, num_samples=30 * 8000, duration=30, sources=[
        AudioSource('file', channels=[0], source='irrelevant1.wav'),
        AudioSource('file', channels=[1], source='irrelevant2.wav')
    ])
    cut_set = CutSet.from_cuts([
        Cut('cut1', start=0, duration=30, channel=0, recording=recording),
        Cut('cut2', start=0, duration=30, channel=1, recording=recording)
    ])

    mixed = cut_set.mix_same_recording_channels()
    assert len(mixed) == 1

    cut = mixed[0]
    assert isinstance(cut, MixedCut)
    assert len(cut.tracks) == 2
    assert cut.tracks[0].cut == cut_set[0]
    assert cut.tracks[1].cut == cut_set[1]
Пример #3
0
def _read_audio(cut: Cut,
                suppress_errors: bool = False) -> Optional[torch.Tensor]:
    """
    Loads audio data from cut, or returns None if there was an error
    and ``suppress_errors`` was set to ``True``.
    """
    with suppress_and_warn(AudioLoadingError,
                           DurationMismatchError,
                           enabled=suppress_errors):
        return torch.from_numpy(cut.load_audio()[0])
Пример #4
0
def cut2(dummy_features):
    return Cut(id='cut-2',
               start=180.0,
               duration=10.0,
               features=dummy_features,
               supervisions=[
                   SupervisionSegment(id='sup-3',
                                      recording_id='irrelevant',
                                      start=3.0,
                                      duration=2.5)
               ])
Пример #5
0
def _read_audio(
        cut: Cut,
        suppress_errors: bool = False,
        recording_field: Optional[str] = None) -> Optional[torch.Tensor]:
    """
    Loads audio data from cut, or returns None if there was an error
    and ``suppress_errors`` was set to ``True``.
    """
    with suppress_audio_loading_errors(enabled=suppress_errors):
        if recording_field is None:
            audio = cut.load_audio()
        else:
            attr = getattr(cut, recording_field)
            assert isinstance(
                attr, Recording
            ), f"Expected 'getattr(cut, {recording_field})' to yield Recording, got {type(attr)}"
            audio = cut.load_custom(recording_field)
        assert audio.shape[
            0] == 1, f"Expected single-channel audio in cut:\n{cut}"
        return torch.from_numpy(audio[0])
Пример #6
0
def dummy_cut(id: str = 'irrelevant',
              start: float = 0.0,
              duration: float = 1.0,
              supervisions=None):
    return Cut(
        id=id,
        start=start,
        duration=duration,
        features=dummy_features(0),
        supervisions=supervisions if supervisions is not None else [],
    )
Пример #7
0
def dummy_cut(unique_id: int,
              start: float = 0.0,
              duration: float = 1.0,
              supervisions=None):
    return Cut(
        id=f'dummy-cut-{unique_id:04d}',
        start=start,
        duration=duration,
        channel=0,
        features=dummy_features(unique_id),
        supervisions=supervisions if supervisions is not None else [],
    )
Пример #8
0
def test_trim_to_supervisions_mixed_cuts():
    cut_set = CutSet.from_cuts([
        Cut('cut1',
            start=0,
            duration=30,
            channel=0,
            supervisions=[
                SupervisionSegment('sup1', 'rec1', start=1.5, duration=8.5),
                SupervisionSegment('sup2', 'rec1', start=10, duration=5),
                SupervisionSegment('sup3', 'rec1', start=20, duration=8),
            ]).append(
                Cut('cut2',
                    start=0,
                    duration=30,
                    channel=0,
                    supervisions=[
                        SupervisionSegment('sup4',
                                           'rec1',
                                           start=0,
                                           duration=30),
                    ]))
    ])
    cuts = cut_set.trim_to_supervisions()
    assert len(cuts) == 4
    assert all(isinstance(cut, MixedCut) for cut in cuts)
    assert all(cut.start == 0 for cut in cuts)
    assert all(len(cut.supervisions) == 1 for cut in cuts)
    assert all(cut.supervisions[0].start == 0 for cut in cuts)
    cut = cuts[0]
    assert cut.duration == 8.5
    assert cut.supervisions[0].id == 'sup1'
    cut = cuts[1]
    assert cut.duration == 5
    assert cut.supervisions[0].id == 'sup2'
    cut = cuts[2]
    assert cut.duration == 8
    assert cut.supervisions[0].id == 'sup3'
    cut = cuts[3]
    assert cut.duration == 30
    assert cut.supervisions[0].id == 'sup4'
Пример #9
0
def test_trim_to_unsupervised_segments():
    cut_set = CutSet.from_cuts([
        # Yields 3 unsupervised cuts - before first supervision,
        # between sup2 and sup3, and after sup3.
        Cut('cut1',
            start=0,
            duration=30,
            channel=0,
            supervisions=[
                SupervisionSegment('sup1', 'rec1', start=1.5, duration=8.5),
                SupervisionSegment('sup2', 'rec1', start=10, duration=5),
                SupervisionSegment('sup3', 'rec1', start=20, duration=8),
            ]),
        # Does not yield any "unsupervised" cut.
        Cut('cut2',
            start=0,
            duration=30,
            channel=0,
            supervisions=[
                SupervisionSegment('sup4', 'rec1', start=0, duration=30),
            ]),
    ])
    unsupervised_cuts = cut_set.trim_to_unsupervised_segments()

    assert len(unsupervised_cuts) == 3

    assert unsupervised_cuts[0].start == 0
    assert unsupervised_cuts[0].duration == 1.5
    assert unsupervised_cuts[0].supervisions == []

    assert unsupervised_cuts[1].start == 15
    assert unsupervised_cuts[1].duration == 5
    assert unsupervised_cuts[1].supervisions == []

    assert unsupervised_cuts[2].start == 28
    assert unsupervised_cuts[2].duration == 2
    assert unsupervised_cuts[2].supervisions == []
Пример #10
0
def cut1(dummy_features):
    return Cut(id='cut-1',
               start=0.0,
               duration=10.0,
               features=dummy_features,
               supervisions=[
                   SupervisionSegment(id='sup-1',
                                      recording_id='irrelevant',
                                      start=0.5,
                                      duration=6.0),
                   SupervisionSegment(id='sup-2',
                                      recording_id='irrelevant',
                                      start=7.0,
                                      duration=2.0)
               ])
Пример #11
0
def cut_with_relative_paths():
    return Cut('cut',
               0,
               10,
               0,
               features=Features(type='fbank',
                                 num_frames=1000,
                                 num_features=40,
                                 sampling_rate=8000,
                                 storage_type='lilcom_files',
                                 storage_path='storage_dir',
                                 storage_key='feats.llc',
                                 start=0,
                                 duration=10),
               recording=Recording('rec',
                                   [AudioSource('file', [0], 'audio.wav')],
                                   8000, 80000, 10.0))
Пример #12
0
def overlapping_supervisions_cut():
    return Cut(
        id='cut-1',
        start=0.0,
        duration=0.5,
        channel=0,
        features=Features(
            recording_id='recording-1',
            channels=0,
            start=0,
            duration=0.5,
            type='fbank',
            num_frames=50,
            num_features=80,
            sampling_rate=16000,
            storage_type='lilcom',
            storage_path=
            'test/fixtures/dummy_feats/storage/e66b6386-aee5-4a5a-8369-fdde1d2b97c7.llc'
        ),
        supervisions=[
            SupervisionSegment(id='s1',
                               recording_id='recording-1',
                               start=0.0,
                               duration=0.2),
            SupervisionSegment(id='s2',
                               recording_id='recording-1',
                               start=0.1,
                               duration=0.2),
            SupervisionSegment(id='s3',
                               recording_id='recording-1',
                               start=0.2,
                               duration=0.2),
            SupervisionSegment(id='s4',
                               recording_id='recording-1',
                               start=0.3,
                               duration=0.2)
        ])
Пример #13
0
def _read_features(cut: Cut) -> torch.Tensor:
    return torch.from_numpy(cut.load_features())
Пример #14
0
def _read_audio(cut: Cut) -> torch.Tensor:
    return torch.from_numpy(cut.load_audio()[0])
Пример #15
0
def validate_cut(c: Cut, read_data: bool = False) -> None:
    # Validate MixedCut
    if isinstance(c, MixedCut):
        assert (len(c.tracks) >
                0), f"MonoCut {c.id}: Mixed cut must have at least one track."
        for idx, track in enumerate(c.tracks):
            validate_cut(track.cut, read_data=read_data)
            assert (track.offset >=
                    0), f"MonoCut: {c.id}: track {idx} has a negative offset."
        return

    # Validate MonoCut and PaddingCut
    assert c.start >= 0, f"MonoCut {c.id}: start must be 0 or greater (got {c.start})"
    assert (
        c.duration > 0
    ), f"MonoCut {c.id}: duration must be greater than 0 (got {c.duration})"
    assert (
        c.sampling_rate > 0
    ), f"MonoCut {c.id}: sampling_rate must be greater than 0 (got {c.sampling_rate})"
    assert (
        c.has_features or c.has_recording
    ), f"MonoCut {c.id}: must have either Features or Recording attached."

    # The rest pertains only to regular Cuts
    if isinstance(c, PaddingCut):
        return

    # Conditions related to features
    if c.has_features:
        validate_features(c.features)
        assert c.channel == c.features.channels
        if read_data:
            # We are not passing "read_data" to "validate_features" to avoid loading feats twice;
            # we'll just validate the subset of the features relevant for the cut.
            feats = c.load_features()
            n_fr, n_ft = feats.shape
            assert (
                c.num_frames == n_fr
            ), f"MonoCut {c.id}: expected num_frames: {c.num_frames}, actual: {n_fr}"
            assert (
                c.num_features == n_ft
            ), f"MonoCut {c.id}: expected num_features: {c.num_features}, actual: {n_ft}"

    # Conditions related to recording
    if c.has_recording:
        validate_recording(c.recording)
        assert c.channel in c.recording.channel_ids
        if read_data:
            # We are not passing "read_data" to "validate_recording" to avoid loading audio twice;
            # we'll just validate the subset of the recording relevant for the cut.
            samples = c.load_audio()
            assert (
                c.num_samples == samples.shape[1]
            ), f"MonoCut {c.id}: expected {c.num_samples} samples, got {samples.shape[1]}"

    # Conditions related to supervisions
    for s in c.supervisions:
        validate_supervision(s)
        assert s.recording_id == c.recording_id, (
            f"MonoCut {c.id}: supervision {s.id} has a mismatched recording_id "
            f"(expected {c.recording_id}, supervision has {s.recording_id})")
        assert s.channel == c.channel, (
            f"MonoCut {c.id}: supervision {s.id} has a mismatched channel "
            f"(expected {c.channel}, supervision has {s.channel})")

    # Conditions related to custom fields
    if c.custom is not None:
        assert isinstance(
            c.custom, dict
        ), f"MonoCut {c.id}: custom field has to be set to a dict or None."
        for key, value in c.custom.items():
            if isinstance(value, Array):
                validate_array(value, read_data=read_data)
            elif isinstance(value, TemporalArray):
                validate_temporal_array(value, read_data=read_data)
                if not isclose(c.duration, value.duration):
                    logging.warning(
                        f"MonoCut {c.id}: possibly mismatched "
                        f"duration between cut ({c.duration}s) and temporal array "
                        f"in custom field '{key}' (num_frames={value.num_frames} * "
                        f"frame_shift={value.frame_shift} == duration={value.duration})."
                    )