示例#1
0
def test_combine(manifest_type):
    expected = DummyManifest(manifest_type, begin_id=0, end_id=200)
    combined = combine(
        DummyManifest(manifest_type, begin_id=0, end_id=68),
        DummyManifest(manifest_type, begin_id=68, end_id=136),
        DummyManifest(manifest_type, begin_id=136, end_id=200),
    )
    assert combined == expected
示例#2
0
def test_split_randomize(manifest_type):
    manifest = DummyManifest(manifest_type, begin_id=0, end_id=100)
    manifest_subsets = manifest.split(num_splits=2, randomize=True)
    assert len(manifest_subsets) == 2
    recombined_items = list(manifest_subsets[0]) + list(manifest_subsets[1])
    assert len(recombined_items) == len(manifest)
    # Different ordering (we convert to lists first because the *Set classes might internally
    # re-order after concatenation, e.g. by using dict or post-init sorting)
    assert recombined_items != list(manifest)
示例#3
0
def test_split_even(manifest_type):
    manifest = DummyManifest(manifest_type, begin_id=0, end_id=100)
    manifest_subsets = split(manifest, num_splits=2)
    assert len(manifest_subsets) == 2
    assert manifest_subsets[0] == DummyManifest(manifest_type,
                                                begin_id=0,
                                                end_id=50)
    assert manifest_subsets[1] == DummyManifest(manifest_type,
                                                begin_id=50,
                                                end_id=100)
示例#4
0
def test_split_odd(manifest_type):
    manifest = DummyManifest(manifest_type, begin_id=0, end_id=100)
    manifest_subsets = split(manifest, num_splits=3)
    assert len(manifest_subsets) == 3
    assert manifest_subsets[0] == DummyManifest(manifest_type,
                                                begin_id=0,
                                                end_id=34)
    assert manifest_subsets[1] == DummyManifest(manifest_type,
                                                begin_id=34,
                                                end_id=68)
    assert manifest_subsets[2] == DummyManifest(manifest_type,
                                                begin_id=68,
                                                end_id=100)
示例#5
0
def test_k2_speech_recognition_iterable_dataset_shuffling():
    # The dummy cuts have a duration of 1 second each
    cut_set = DummyManifest(CutSet, begin_id=0, end_id=100)

    dataset = K2SpeechRecognitionIterableDataset(
        cuts=cut_set,
        shuffle=True,
        # Set an effective batch size of 10 cuts, as all have 1s duration == 100 frames
        # This way we're testing that it works okay when returning multiple batches in
        # a full epoch.
        max_frames=1000
    )
    dloader = DataLoader(dataset, batch_size=None, num_workers=2)
    dloader_cut_ids = []
    batches = []
    for batch in dloader:
        batches.append(batch)
        dloader_cut_ids.extend(list(batch['supervisions']['cut_id']))

    # Invariant 1: we receive the same amount of items in a dataloader epoch as there we in the CutSet
    assert len(dloader_cut_ids) == len(cut_set)
    # Invariant 2: the items are not duplicated
    assert len(set(dloader_cut_ids)) == len(dloader_cut_ids)
    # Invariant 3: the items are shuffled, i.e. the order is different than that in the CutSet
    assert dloader_cut_ids != [c.id for c in cut_set]
示例#6
0
def test_add_feature_sets():
    expected = DummyManifest(FeatureSet, begin_id=0, end_id=10)
    feature_set_1 = DummyManifest(FeatureSet, begin_id=0, end_id=5)
    feature_set_2 = DummyManifest(FeatureSet, begin_id=5, end_id=10)
    combined = feature_set_1 + feature_set_2
    assert combined == expected
示例#7
0
def test_cannot_split_to_more_chunks_than_items(manifest_type):
    manifest = DummyManifest(manifest_type, begin_id=0, end_id=1)
    with pytest.raises(ValueError):
        manifest.split(num_splits=10)
示例#8
0
def test_add_audio_sets():
    expected = DummyManifest(RecordingSet, begin_id=0, end_id=10)
    audio_set_1 = DummyManifest(RecordingSet, begin_id=0, end_id=5)
    audio_set_2 = DummyManifest(RecordingSet, begin_id=5, end_id=10)
    combined = audio_set_1 + audio_set_2
    assert combined == expected
示例#9
0
def test_add_recording_sets():
    expected = DummyManifest(RecordingSet, begin_id=0, end_id=10)
    recording_set_1 = DummyManifest(RecordingSet, begin_id=0, end_id=5)
    recording_set_2 = DummyManifest(RecordingSet, begin_id=5, end_id=10)
    combined = recording_set_1 + recording_set_2
    assert combined == expected
示例#10
0
def test_add_supervision_sets():
    expected = DummyManifest(SupervisionSet, begin_id=0, end_id=10)
    supervision_set_1 = DummyManifest(SupervisionSet, begin_id=0, end_id=5)
    supervision_set_2 = DummyManifest(SupervisionSet, begin_id=5, end_id=10)
    combined = supervision_set_1 + supervision_set_2
    assert combined == expected