def test_validate(self): ds = resources.create_single_label_corpus() utt4_ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=0.0, end=1.44), annotations.Label('a', start=1.89, end=10.0), ]) ds.utterances['utt-4'].set_label_list(utt4_ll) utt6_ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=1.33, end=5.9), annotations.Label('a', start=5.9, end=14.7), ]) ds.utterances['utt-6'].set_label_list(utt6_ll) val = validation.LabelCoverageValidator('default') result = val.validate(ds) assert not result.passed assert set(result.uncovered_segments.keys()) == {'utt-4', 'utt-6'} assert result.uncovered_segments['utt-4'] == [(1.44, 1.89)] assert result.uncovered_segments['utt-6'] == [(0.0, 1.33), (14.7, 15.0)]
def test_validate_returns_part_of_overlapping_label(self): ds = resources.create_single_label_corpus() utt4_ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=0.0, end=9.0), annotations.Label('b', start=9.0, end=13.0), ]) ds.utterances['utt-4'].set_label_list(utt4_ll) utt6_ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=-2.0, end=5.9), annotations.Label('b', start=5.9, end=14.7), ]) ds.utterances['utt-6'].set_label_list(utt6_ll) val = validation.LabelOverflowValidator('default') result = val.validate(ds) assert not result.passed assert set(result.overflow_segments.keys()) == {'utt-4', 'utt-6'} assert result.overflow_segments['utt-4'] == [(10.0, 13.0, 'b')] assert result.overflow_segments['utt-6'] == [(-2.0, 0.0, 'a')]
def corpus_with_more_labels(): """ Corpus with an extra label-list. """ corpus = resources.create_single_label_corpus() corpus.utterances['utt-1'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('alpha')])) corpus.utterances['utt-2'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('alpha')])) corpus.utterances['utt-3'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('beta')])) corpus.utterances['utt-4'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('beta')])) corpus.utterances['utt-5'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('beta')])) corpus.utterances['utt-6'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('beta')])) corpus.utterances['utt-7'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('beta')])) corpus.utterances['utt-8'].set_label_list( assets.LabelList(idx='radio', labels=[assets.Label('beta')])) return corpus
def test_validate_passes(self): ds = resources.create_single_label_corpus() val = validation.LabelCoverageValidator('default') result = val.validate(ds) assert result.passed assert len(result.uncovered_segments) == 0
def test_encode_corpus(self, tmpdir): ds = resources.create_single_label_corpus() target_path = os.path.join(tmpdir.strpath, 'data.hdf5') encoder = EncoderMock() container = encoder.encode_corpus(ds, target_path) with container as ct: assert ct.path == target_path assert set(ct.keys()) == set(ds.utterances.keys()) for utterance_idx in ds.utterances: assert np.array_equal(ct.get(utterance_idx, mem_map=False), np.array([1, 2, 3]))
def corpus(): return resources.create_single_label_corpus()