def test_more_complicated_example(self): sequence_align = SequenceAlignment(self.x8, self.y8) editsteps, _ = sequence_align.alignment() self.assertEqual(self.correct_editstep8, editsteps)
def test_findsolution_simplecase(self): sequence_align = SequenceAlignment(self.x9, self.y9) _, solution = sequence_align.alignment() self.assertEqual(self.correct_solution9, solution)
def test_x_longer_than_y(self): sequence_align = SequenceAlignment(self.x6, self.y6) editsteps, _ = sequence_align.alignment() self.assertEqual(self.correct_editstep6, editsteps)
def test_y_longer_than_x(self): sequence_align = SequenceAlignment(self.x7, self.y7) editsteps, _ = sequence_align.alignment() self.assertEqual(self.correct_editstep7, editsteps)
def test_insert_elements(self): sequence_align = SequenceAlignment(self.x4, self.y4) editsteps, _ = sequence_align.alignment() self.assertEqual(self.correct_editstep4, editsteps)
def test_remove_insert_align(self): sequence_align = SequenceAlignment(self.x5, self.y5) editsteps, _ = sequence_align.alignment() self.assertEqual(self.correct_editstep5, editsteps)
def test_remove_to_empty(self): sequence_align = SequenceAlignment(self.x3, self.y3) editsteps, _ = sequence_align.alignment() self.assertEqual(self.correct_editstep3, editsteps)
def test_remove(self): sequence_align = SequenceAlignment(self.x2, self.y2) editsteps, _ = sequence_align.alignment() self.assertEqual(self.correct_editstep2, editsteps)
def test_simplecase(self): sequence_align = SequenceAlignment(self.x1, self.y1) editsteps, _ = sequence_align.alignment() self.assertEqual(self.correct_editstep1, editsteps)
def test_findsolution_empty_x(self): sequence_align = SequenceAlignment(self.x11, self.y11) _, solution = sequence_align.alignment() self.assertEqual(self.correct_solution11, solution)
] return stressed_phoneme_chunks, stressed_phoneme # Load the aligned grapheme/phoneme pairs with open( '../data/g2p_alignment/m2m_preprocessed_cmudict.txt.m-mAlign.2-2.delX.1-best.conYX.align' ) as infile: aligned_grapheme_phoneme_pairs = [ line.strip().split('\t') for line in infile.readlines() ] # Use the aligned grapheme/phoneme pairs to create a PronunciationDictionary word_list = [] for m2m_grapheme, m2m_phoneme in aligned_grapheme_phoneme_pairs: grapheme_chunks = m2m_grapheme_to_grapheme_chunks(m2m_grapheme) grapheme = grapheme_chunks_to_grapheme_string(grapheme_chunks) phoneme_chunks = m2m_phoneme_to_phoneme_chunks(m2m_phoneme) stressed_phoneme_chunks, stressed_phoneme = phoneme_chunks_to_stressed_phoneme_chunks( phoneme_chunks, grapheme) grapheme_phoneme_aligment = SequenceAlignment(grapheme_chunks, stressed_phoneme_chunks) new_word = Word(grapheme, stressed_phoneme, grapheme_phoneme_aligment) word_list.append(new_word) # Save the PronunciationDictionary PronunciationDictionary(word_list).save('../data/pronunciation_dictionary.pkl')