def test_mixed_cut_fill_supervision_raises_on_two_supervisions():
    cut = dummy_cut(0,
                    supervisions=[dummy_supervision(0),
                                  dummy_supervision(1)])
    cut = cut.pad(duration=10)
    with pytest.raises(AssertionError):
        fcut = cut.fill_supervision()
示例#2
0
def test_cut_set_decompose():
    c = dummy_cut(
        0,
        start=5.0,
        duration=10.0,
        supervisions=[
            dummy_supervision(0, start=0.0),
            dummy_supervision(1, start=6.5)
        ],
    )
    assert c.start == 5.0
    assert c.end == 15.0
    cuts = CutSet.from_cuts([c])

    recs, sups, feats = cuts.decompose()

    assert isinstance(recs, RecordingSet)
    assert len(recs) == 1
    assert recs[0].id == "dummy-recording-0000"

    assert isinstance(sups, SupervisionSet)
    assert len(sups) == 2
    assert sups[0].id == "dummy-segment-0000"
    assert sups[0].start == 5.0
    assert sups[0].end == 6.0
    assert sups[1].id == "dummy-segment-0001"
    assert sups[1].start == 11.5
    assert sups[1].end == 12.5

    assert isinstance(feats, FeatureSet)
    assert len(feats) == 1
def cutset():
    return CutSet.from_cuts([
        # MonoCut
        dummy_cut(0, supervisions=[dummy_supervision(0)]),
        # PaddingCut
        PaddingCut('pad', duration=1.0, sampling_rate=16000, feat_value=-100,
                   num_frames=100, frame_shift=0.01, num_features=80, num_samples=16000),
        # MixedCut
        dummy_cut(0, supervisions=[dummy_supervision(0)]).mix(
            dummy_cut(1, supervisions=[dummy_supervision(1)]),
            offset_other_by=0.5,
            snr=10
        )
    ])
示例#4
0
def test_token_collater(add_bos, add_eos):
    test_sentences = [
        "Testing the first sentence.",
        "Let's add some more punctuation, shall we?",
        "How about number 42!",
    ]

    cuts = CutSet.from_cuts(
        dummy_cut(idx,
                  idx,
                  supervisions=[dummy_supervision(idx, idx, text=sentence)])
        for idx, sentence in enumerate(test_sentences))

    token_collater = TokenCollater(cuts, add_bos=add_bos, add_eos=add_eos)
    tokens_batch, tokens_lens = token_collater(cuts)

    assert isinstance(tokens_batch, torch.LongTensor)
    assert isinstance(tokens_lens, torch.IntTensor)

    extend = int(add_bos) + int(add_eos)
    expected_len = len(max(test_sentences, key=len)) + extend
    assert tokens_batch.shape == (len(test_sentences), expected_len)
    assert torch.all(tokens_lens == torch.IntTensor(
        [len(sentence) + extend for sentence in test_sentences]))

    reconstructed = token_collater.inverse(tokens_batch, tokens_lens)
    assert reconstructed == test_sentences
示例#5
0
def dummy_cut_with_supervisions():
    return dummy_cut(
        unique_id=0,
        supervisions=[
            dummy_supervision(unique_id=i, duration=i) for i in range(1, 7)
        ],
    )
def test_mixed_cut_fill_supervision_expand():
    cut = dummy_cut(0, supervisions=[dummy_supervision(0)])
    cut = cut.pad(duration=7.51)
    fcut = cut.fill_supervision()
    # Original is not modified
    assert cut.supervisions[0].start == 0
    assert cut.supervisions[0].duration == 1
    # Result is modified
    assert fcut.supervisions[0].start == 0
    assert fcut.supervisions[0].duration == 7.51
def test_mono_cut_fill_supervision_shrink():
    cut = dummy_cut(0, supervisions=[dummy_supervision(0)])
    cut.duration = 0.5
    fcut = cut.fill_supervision(shrink_ok=True)
    # Original is not modified
    assert cut.supervisions[0].start == 0
    assert cut.supervisions[0].duration == 1
    # Result is modified
    assert fcut.supervisions[0].start == 0
    assert fcut.supervisions[0].duration == 0.5
示例#8
0
def test_cut_set_decompose_output_dir():
    c = dummy_cut(
        0,
        start=5.0,
        duration=10.0,
        supervisions=[
            dummy_supervision(0, start=0.0),
            dummy_supervision(1, start=6.5)
        ],
    )
    assert c.start == 5.0
    assert c.end == 15.0
    cuts = CutSet.from_cuts([c])

    with TemporaryDirectory() as td:
        td = Path(td)
        recs, sups, feats = cuts.decompose(output_dir=td)
        assert list(recs) == list(load_manifest(td / "recordings.jsonl.gz"))
        assert list(sups) == list(load_manifest(td / "supervisions.jsonl.gz"))
        assert list(feats) == list(load_manifest(td / "features.jsonl.gz"))
def test_mixed_cut_fill_supervision_shrink():
    cut = dummy_cut(0, supervisions=[dummy_supervision(0)])
    cut = cut.mix(dummy_cut(1)).truncate(
        duration=0.5)  # cuts are 100% overlapping
    fcut = cut.fill_supervision(shrink_ok=True)
    # Original is not modified
    assert cut.supervisions[0].start == 0
    assert cut.supervisions[0].duration == 1
    # Result is modified
    assert fcut.supervisions[0].start == 0
    assert fcut.supervisions[0].duration == 0.5
import pytest

from lhotse import CutSet
from lhotse.cut import PaddingCut
from lhotse.testing.dummies import dummy_cut, dummy_supervision

parametrize_on_cut_types = pytest.mark.parametrize(
    'cut', [
        # MonoCut
        dummy_cut(0, supervisions=[dummy_supervision(0)]),
        # PaddingCut
        PaddingCut('pad', duration=1.0, sampling_rate=16000, feat_value=-100,
                   num_frames=100, frame_shift=0.01, num_features=80, num_samples=16000),
        # MixedCut
        dummy_cut(0, supervisions=[dummy_supervision(0)]).mix(
            dummy_cut(1, supervisions=[dummy_supervision(1)]),
            offset_other_by=0.5,
            snr=10
        )
    ]
)


@parametrize_on_cut_types
def test_drop_features(cut):
    assert cut.has_features
    cut_drop = cut.drop_features()
    assert cut.has_features
    assert not cut_drop.has_features

def test_mono_cut_fill_supervision_identity():
    cut = dummy_cut(0, supervisions=[dummy_supervision(0)])
    fcut = cut.fill_supervision()
    assert cut == fcut
def test_mixed_cut_fill_supervision_identity():
    cut = dummy_cut(0, supervisions=[dummy_supervision(0)])
    cut = cut.mix(dummy_cut(1))  # cuts are 100% overlapping
    fcut = cut.fill_supervision()
    assert cut == fcut
def test_mono_cut_fill_supervision_raises_on_two_supervisions():
    cut = dummy_cut(0,
                    supervisions=[dummy_supervision(0),
                                  dummy_supervision(1)])
    with pytest.raises(AssertionError):
        fcut = cut.fill_supervision()
def test_mono_cut_fill_supervision_shrink_raises_default():
    cut = dummy_cut(0, supervisions=[dummy_supervision(0)])
    cut.duration = 0.5
    with pytest.raises(ValueError):
        fcut = cut.fill_supervision()
def test_mixed_cut_fill_supervision_shrink_raises_default():
    cut = dummy_cut(0, supervisions=[dummy_supervision(0)])
    cut = cut.mix(dummy_cut(1)).truncate(
        duration=0.5)  # cuts are 100% overlapping
    with pytest.raises(ValueError):
        fcut = cut.fill_supervision()