Exemplo n.º 1
0
    def test_multiple_medial_pauses(self):
        words = self.words[:]
        words[5:] = [
            Pause('<SIL>', 0.91, 1.0),
            Pause('<SIL>', 1.0, 1.42),
            Word('mat', 1.42, 1.7, ['m', 'ae', 't'], ['m', 'ae', 't'])
        ]

        utterances = list(words_to_utterances(words))

        assert_equal(len(utterances), 2)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
            assert_equal(words[i].orthography, word.orthography)
            assert_equal(words[i].phonemic, word.phonemic)
            assert_equal(words[i].phonetic, word.phonetic)

        for i, word in enumerate(utterances[1], 7):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
            assert_equal(words[i].orthography, word.orthography)
            assert_equal(words[i].phonemic, word.phonemic)
            assert_equal(words[i].phonetic, word.phonetic)
Exemplo n.º 2
0
    def test_sep_arg(self):
        words = self.words[:]
        words[5:] = [
            Pause('<SIL>', 0.91, 1.0),
            Word('mat', 1.0, 1.28, ['m', 'ae', 't'], ['m', 'ae', 't'])
        ]

        utterances = list(words_to_utterances(words, sep=0.08))

        assert_equal(len(utterances), 2)
        assert_equal(len(utterances[0]), 5)
        assert_equal(len(utterances[1]), 1)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
            assert_equal(words[i].orthography, word.orthography)
            assert_equal(words[i].phonemic, word.phonemic)
            assert_equal(words[i].phonetic, word.phonetic)

        assert_equal(words[6].beg, utterances[1][0].beg)
        assert_equal(words[6].end, utterances[1][0].end)
        assert_equal(words[6].orthography, utterances[1][0].orthography)
        assert_equal(words[6].phonemic, utterances[1][0].phonemic)
        assert_equal(words[6].phonetic, utterances[1][0].phonetic)
Exemplo n.º 3
0
def prep_BUCKEYE(speaker_list):
    track_durations = []
    word_durations = []
    utterance_durations = []
    phone_durations = []

    all_words = []
    all_phonemic = []
    all_phonetic = []
    all_pos = []
    for speaker_loc in tqdm(speaker_list):
        speaker_words = []
        speaker_phonemic = []
        speaker_phonetic = []
        speaker_pos = []
        speaker = buckeye.Speaker.from_zip(speaker_loc, load_wavs=True)
        for track in tqdm([track for track in speaker], leave=False):
            track_durations.append(track.words[-1].end - track.words[0].beg)
            for utterance in words_to_utterances(track.words):
                utterance_durations.append(utterance.dur)
                words = []
                phonemic = []
                phonetic = []
                pos = []
                for word in utterance:
                    if hasattr(word,
                               "phonemic"):  # if this is not a pause, etc
                        words.append(word.orthography)
                        word_durations.append(word.dur)
                        if word.phonemic is not None:
                            phonemic.append(word.phonemic)
                        if word.phonetic is not None:
                            phonetic.append(word.phonetic)
                        if word.phonemic is not None:
                            for phone in word.phones:
                                phone_durations.append(phone.dur)
                        pos.append(word.pos)
                speaker_words.append(words)
                speaker_phonemic.append(phonemic)
                speaker_phonetic.append(phonetic)
                speaker_pos.append(pos)
        all_words.append(speaker_words)
        all_phonemic.append(speaker_phonemic)
        all_phonetic.append(speaker_phonetic)
        all_pos.append(speaker_pos)

    return (
        track_durations,
        word_durations,
        utterance_durations,
        phone_durations,
        all_words,
        all_phonemic,
        all_phonetic,
        all_pos,
    )
Exemplo n.º 4
0
    def test_final_pause_no_strip(self):
        final_pause = Pause('<SIL>', 1.19, 1.25)
        words = self.words + [final_pause]

        utterances = list(words_to_utterances(words, strip_pauses=False))

        assert_equal(len(utterances), 1)
        assert_equal(len(utterances[0]), 7)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
Exemplo n.º 5
0
    def test_initial_pause_no_strip(self):
        initial_pause = Pause('<SIL>', 0.0, 0.05)
        words = [initial_pause] + self.words

        utterances = list(words_to_utterances(words, strip_pauses=False))

        assert_equal(len(utterances), 1)
        assert_equal(len(utterances[0]), 7)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
Exemplo n.º 6
0
    def test_short_medial_pause(self):
        words = self.words[:]
        words[5:] = [Pause('<SIL>', 0.91, 1.0),
                     Word('mat', 1.0, 1.28, ['m', 'ae', 't'], ['m', 'ae', 't'])]

        utterances = list(words_to_utterances(words))

        assert_equal(len(utterances), 1)
        assert_equal(words[5].entry, utterances[0][5].entry)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
Exemplo n.º 7
0
    def test_short_medial_pause(self):
        words = self.words[:]
        words[5:] = [
            Pause('<SIL>', 0.91, 1.0),
            Word('mat', 1.0, 1.28, ['m', 'ae', 't'], ['m', 'ae', 't'])
        ]

        utterances = list(words_to_utterances(words))

        assert_equal(len(utterances), 1)
        assert_equal(words[5].entry, utterances[0][5].entry)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
Exemplo n.º 8
0
    def test_multiple_short_medial_pauses(self):
        words = self.words[:]
        words[3:] = [Pause('<VOCNOISE>', 0.59, 0.65),
                     Word('on', 0.65, 0.77, ['aa', 'n'], ['aa', 'n']),
                     self.words[4],
                     Pause('<SIL>', 0.91, 1.35),
                     Word('mat', 1.35, 1.63, ['m', 'ae', 't'], ['m', 'ae', 't'])]

        utterances = list(words_to_utterances(words))

        assert_equal(len(utterances), 1)
        assert_equal(words[3].entry, utterances[0][3].entry)
        assert_equal(words[6].entry, utterances[0][6].entry)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
Exemplo n.º 9
0
    def test_medial_pause_no_strip(self):
        # insert a pause into a copy of the word list
        words = self.words[:]
        words[5:] = [
            Pause('<SIL>', 0.91, 1.42),
            Word('mat', 1.42, 1.7, ['m', 'ae', 't'], ['m', 'ae', 't'])
        ]

        utterances = list(words_to_utterances(words, strip_pauses=False))

        assert_equal(len(utterances), 2)
        assert_equal(len(utterances[0]), 6)
        assert_equal(len(utterances[1]), 1)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
Exemplo n.º 10
0
    def test_multiple_short_medial_pauses(self):
        words = self.words[:]
        words[3:] = [
            Pause('<VOCNOISE>', 0.59, 0.65),
            Word('on', 0.65, 0.77, ['aa', 'n'], ['aa', 'n']), self.words[4],
            Pause('<SIL>', 0.91, 1.35),
            Word('mat', 1.35, 1.63, ['m', 'ae', 't'], ['m', 'ae', 't'])
        ]

        utterances = list(words_to_utterances(words))

        assert_equal(len(utterances), 1)
        assert_equal(words[3].entry, utterances[0][3].entry)
        assert_equal(words[6].entry, utterances[0][6].entry)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
Exemplo n.º 11
0
    def test_multiple_medial_pauses_no_strip(self):
        words = self.words[:]
        words[5:] = [
            Pause('<SIL>', 0.91, 1.0),
            Pause('<SIL>', 1.0, 1.42),
            Word('mat', 1.42, 1.7, ['m', 'ae', 't'], ['m', 'ae', 't'])
        ]

        utterances = list(words_to_utterances(words, strip_pauses=False))

        assert_equal(len(utterances), 2)
        assert_equal(len(utterances[0]), 7)
        assert_equal(len(utterances[1]), 1)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)

        for i, word in enumerate(utterances[1], 7):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
Exemplo n.º 12
0
    def test_medial_pause(self):
        # insert a pause into a copy of the word list
        words = self.words[:]
        words[5:] = [Pause('<SIL>', 0.91, 1.42),
                     Word('mat', 1.42, 1.7, ['m', 'ae', 't'], ['m', 'ae', 't'])]

        utterances = list(words_to_utterances(words))

        assert_equal(len(utterances), 2)
        assert_equal(len(utterances[0]), 5)
        assert_equal(len(utterances[1]), 1)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)

        assert_equal(words[6].beg, utterances[1][0].beg)
        assert_equal(words[6].end, utterances[1][0].end)
        assert_equal(words[6].orthography, utterances[1][0].orthography)
        assert_equal(words[6].phonemic, utterances[1][0].phonemic)
        assert_equal(words[6].phonetic, utterances[1][0].phonetic)
Exemplo n.º 13
0
    def test_multiple_medial_pauses(self):
        words = self.words[:]
        words[5:] = [Pause('<SIL>', 0.91, 1.0),
                     Pause('<SIL>', 1.0, 1.42),
                     Word('mat', 1.42, 1.7, ['m', 'ae', 't'], ['m', 'ae', 't'])]

        utterances = list(words_to_utterances(words))

        assert_equal(len(utterances), 2)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
            assert_equal(words[i].orthography, word.orthography)
            assert_equal(words[i].phonemic, word.phonemic)
            assert_equal(words[i].phonetic, word.phonetic)

        for i, word in enumerate(utterances[1], 7):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
            assert_equal(words[i].orthography, word.orthography)
            assert_equal(words[i].phonemic, word.phonemic)
            assert_equal(words[i].phonetic, word.phonetic)
Exemplo n.º 14
0
    def test_medial_pause(self):
        # insert a pause into a copy of the word list
        words = self.words[:]
        words[5:] = [
            Pause('<SIL>', 0.91, 1.42),
            Word('mat', 1.42, 1.7, ['m', 'ae', 't'], ['m', 'ae', 't'])
        ]

        utterances = list(words_to_utterances(words))

        assert_equal(len(utterances), 2)
        assert_equal(len(utterances[0]), 5)
        assert_equal(len(utterances[1]), 1)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)

        assert_equal(words[6].beg, utterances[1][0].beg)
        assert_equal(words[6].end, utterances[1][0].end)
        assert_equal(words[6].orthography, utterances[1][0].orthography)
        assert_equal(words[6].phonemic, utterances[1][0].phonemic)
        assert_equal(words[6].phonetic, utterances[1][0].phonetic)
Exemplo n.º 15
0
    def test_sep_arg(self):
        words = self.words[:]
        words[5:] = [Pause('<SIL>', 0.91, 1.0),
                     Word('mat', 1.0, 1.28, ['m', 'ae', 't'], ['m', 'ae', 't'])]

        utterances = list(words_to_utterances(words, sep=0.08))

        assert_equal(len(utterances), 2)
        assert_equal(len(utterances[0]), 5)
        assert_equal(len(utterances[1]), 1)

        for i, word in enumerate(utterances[0]):
            assert_equal(words[i].beg, word.beg)
            assert_equal(words[i].end, word.end)
            assert_equal(words[i].orthography, word.orthography)
            assert_equal(words[i].phonemic, word.phonemic)
            assert_equal(words[i].phonetic, word.phonetic)
            
        assert_equal(words[6].beg, utterances[1][0].beg)
        assert_equal(words[6].end, utterances[1][0].end)
        assert_equal(words[6].orthography, utterances[1][0].orthography)
        assert_equal(words[6].phonemic, utterances[1][0].phonemic)
        assert_equal(words[6].phonetic, utterances[1][0].phonetic)
Exemplo n.º 16
0
    def test_words_to_utterances(self):
        utterances = list(words_to_utterances(self.words))

        yield self.check_expected, utterances
Exemplo n.º 17
0
    def test_final_pause(self):
        final_pause = Pause('<SIL>', 1.19, 1.25)
        utterances = list(words_to_utterances(self.words + [final_pause]))

        yield self.check_expected, utterances
Exemplo n.º 18
0
    def test_final_pause(self):
        final_pause = Pause('<SIL>', 1.19, 1.25)
        utterances = list(words_to_utterances(self.words + [final_pause]))

        yield self.check_expected, utterances
Exemplo n.º 19
0
    def test_initial_pause(self):
        initial_pause = Pause('<SIL>', 0.0, 0.05)
        utterances = list(words_to_utterances([initial_pause] + self.words))

        yield self.check_expected, utterances
Exemplo n.º 20
0
    def test_words_to_utterances(self):
        utterances = list(words_to_utterances(self.words))

        yield self.check_expected, utterances
Exemplo n.º 21
0
    def test_initial_pause(self):
        initial_pause = Pause('<SIL>', 0.0, 0.05)
        utterances = list(words_to_utterances([initial_pause] + self.words))

        yield self.check_expected, utterances