def test_filtered_cmu_entries(self): entries = entries_from_cmudict() entries = filter_entries(entries, 'Austen') self.assertEqual( entries[:2], [(u'a', [u'AH0']), (u'a', [u'EY1'])] )
def _generate_words_letters(number_of_words=100, order=1, max_transitions=30, words_cache=None): if words_cache is None: words_cache = WordsCache() entries = entries_from_cmudict() existing_words = [word for word, phones in entries] entries = filter_entries(entries, 'Austen') sequences = [[letter for letter in word] for word, phones in entries] markov_tree = generate_markov_tree(sequences, order=order) number_generated = 0 while number_generated < number_of_words: cached_words = words_cache.get_words('letters') new_sequence = generate_new_sequence(markov_tree, max_transitions) new_word = ''.join(new_sequence) # Reject words already in the corpus if new_word in existing_words or new_word in cached_words: continue else: words_cache.add_word('letters', new_word) print "New word: %s" % new_word number_generated += 1 return words_cache
def _generate_words_tuples(number_of_words=100, tuple_length=2, words_cache=None): if words_cache is None: words_cache = WordsCache() entries = entries_from_cmudict() existing_words = [word for word, phones in entries] entries = filter_entries(entries, 'Austen') entry_words = [entry[0] for entry in entries] number_generated = 0 while number_generated < number_of_words: cached_words = words_cache.get_words('tuples') words_tuple = [] for i in range(tuple_length): word = random.choice(entry_words) words_tuple.append(word) new_word = ''.join(words_tuple) # Reject words in the corpus if new_word in cached_words or new_word in existing_words: continue else: words_cache.add_word('tuples', new_word) print "New word: %s" % new_word number_generated += 1 return words_cache
def test_words_correct(self): """Test the accuracy of the spelling against existing words.""" entries = entries_from_cmudict() entries = filter_entries(entries, 'Austen') number_correct = 0 for entry in entries: word, phones = entry # clean_phone modifies phones in-place, so cleaned_phones = _clean_phones(phones) calculated_word = phones_to_word(cleaned_phones) if word == calculated_word: number_correct += 1 else: print "Incorrect spelling. Expected %s, got %s. Phones: %s" % ( word, calculated_word, phones ) self.assertEqual(number_correct, 1001)
def test_entries_from_cmudict(self): entries = entries_from_cmudict() self.assertEqual( entries[:2], [(u'a', [u'AH0']), (u'a.', [u'EY1'])] )
def test_entries_from_cmudict(self): entries = entries_from_cmudict() self.assertEqual(entries[:2], [(u'a', [u'AH0']), (u'a.', [u'EY1'])])
def test_filtered_cmu_entries(self): entries = entries_from_cmudict() entries = filter_entries(entries, 'Austen') self.assertEqual(entries[:2], [(u'a', [u'AH0']), (u'a', [u'EY1'])])