Пример #1
0
    def test_validate(self):
        ds = resources.create_dataset()
        ds.utterances['utt-3'].label_lists[corpus.LL_WORD_TRANSCRIPT][0].value = 'max length here 11'
        ds.utterances['utt-4'].label_lists[corpus.LL_WORD_TRANSCRIPT][0].value = 'too long here'

        val = validation.UtteranceTranscriptionRatioValidator(10, corpus.LL_WORD_TRANSCRIPT)
        result = val.validate(ds)

        assert not result.passed
        assert len(result.invalid_utterances) == 1
        assert 'utt-4' in result.invalid_utterances.keys()
Пример #2
0
    def test_validate(self):
        ds = resources.create_dataset()
        ds.utterances['utt-3'].set_label_list(
            annotations.LabelList.create_single('max length here 11',
                                                idx=corpus.LL_WORD_TRANSCRIPT))

        ds.utterances['utt-4'].set_label_list(
            annotations.LabelList.create_single('too long here',
                                                idx=corpus.LL_WORD_TRANSCRIPT))

        val = validation.UtteranceTranscriptionRatioValidator(
            10, corpus.LL_WORD_TRANSCRIPT)
        result = val.validate(ds)

        assert not result.passed
        assert len(result.invalid_utterances) == 1
        assert 'utt-4' in result.invalid_utterances.keys()
Пример #3
0
def find_invalid_character_ratios(output_path, corpus):
    #
    # Find invalid chracter ratios
    #
    report_path = os.path.join(output_path, 'invalid_character_ratio.json')

    if not os.path.isfile(report_path):
        print('Validate character ratio ...')
        v = validation.UtteranceTranscriptionRatioValidator(
            max_characters_per_second=25,
            label_list_idx=audiomate.corpus.LL_WORD_TRANSCRIPT,
            num_threads=4)
        result = v.validate(corpus)
        invalid_utts = result.invalid_items
        write_report(report_path, invalid_utts)
    else:
        invalid_utts = read_report(report_path)
        print('Validate character ratio - Already done')

    return invalid_utts.keys()
Пример #4
0
    def test_name(self):
        val = validation.UtteranceTranscriptionRatioValidator(10, corpus.LL_WORD_TRANSCRIPT)

        assert val.name() == 'Utterance-Transcription-Ratio ({})'.format(corpus.LL_WORD_TRANSCRIPT)