示例#1
0
    def test_validate(self):
        ds = resources.create_single_label_corpus()
        utt4_ll = annotations.LabelList(idx='default',
                                        labels=[
                                            annotations.Label('a',
                                                              start=0.0,
                                                              end=1.44),
                                            annotations.Label('a',
                                                              start=1.89,
                                                              end=10.0),
                                        ])
        ds.utterances['utt-4'].set_label_list(utt4_ll)
        utt6_ll = annotations.LabelList(idx='default',
                                        labels=[
                                            annotations.Label('a',
                                                              start=1.33,
                                                              end=5.9),
                                            annotations.Label('a',
                                                              start=5.9,
                                                              end=14.7),
                                        ])
        ds.utterances['utt-6'].set_label_list(utt6_ll)

        val = validation.LabelCoverageValidator('default')
        result = val.validate(ds)

        assert not result.passed
        assert set(result.uncovered_segments.keys()) == {'utt-4', 'utt-6'}

        assert result.uncovered_segments['utt-4'] == [(1.44, 1.89)]
        assert result.uncovered_segments['utt-6'] == [(0.0, 1.33),
                                                      (14.7, 15.0)]
示例#2
0
    def test_validate_returns_part_of_overlapping_label(self):
        ds = resources.create_single_label_corpus()
        utt4_ll = annotations.LabelList(idx='default',
                                        labels=[
                                            annotations.Label('a',
                                                              start=0.0,
                                                              end=9.0),
                                            annotations.Label('b',
                                                              start=9.0,
                                                              end=13.0),
                                        ])
        ds.utterances['utt-4'].set_label_list(utt4_ll)
        utt6_ll = annotations.LabelList(idx='default',
                                        labels=[
                                            annotations.Label('a',
                                                              start=-2.0,
                                                              end=5.9),
                                            annotations.Label('b',
                                                              start=5.9,
                                                              end=14.7),
                                        ])
        ds.utterances['utt-6'].set_label_list(utt6_ll)

        val = validation.LabelOverflowValidator('default')
        result = val.validate(ds)

        assert not result.passed
        assert set(result.overflow_segments.keys()) == {'utt-4', 'utt-6'}

        assert result.overflow_segments['utt-4'] == [(10.0, 13.0, 'b')]
        assert result.overflow_segments['utt-6'] == [(-2.0, 0.0, 'a')]
示例#3
0
def corpus_with_more_labels():
    """
    Corpus with an extra label-list.
    """
    corpus = resources.create_single_label_corpus()

    corpus.utterances['utt-1'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('alpha')]))

    corpus.utterances['utt-2'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('alpha')]))

    corpus.utterances['utt-3'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('beta')]))

    corpus.utterances['utt-4'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('beta')]))

    corpus.utterances['utt-5'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('beta')]))

    corpus.utterances['utt-6'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('beta')]))

    corpus.utterances['utt-7'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('beta')]))

    corpus.utterances['utt-8'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('beta')]))

    return corpus
示例#4
0
    def test_validate_passes(self):
        ds = resources.create_single_label_corpus()

        val = validation.LabelCoverageValidator('default')
        result = val.validate(ds)

        assert result.passed
        assert len(result.uncovered_segments) == 0
示例#5
0
    def test_encode_corpus(self, tmpdir):
        ds = resources.create_single_label_corpus()
        target_path = os.path.join(tmpdir.strpath, 'data.hdf5')

        encoder = EncoderMock()
        container = encoder.encode_corpus(ds, target_path)

        with container as ct:
            assert ct.path == target_path
            assert set(ct.keys()) == set(ds.utterances.keys())

            for utterance_idx in ds.utterances:
                assert np.array_equal(ct.get(utterance_idx, mem_map=False), np.array([1, 2, 3]))
示例#6
0
def corpus():
    return resources.create_single_label_corpus()