예제 #1
0
 def test_filtered_cmu_entries(self):
     entries = entries_from_cmudict()
     entries = filter_entries(entries, 'Austen')
     self.assertEqual(
         entries[:2],
         [(u'a', [u'AH0']), (u'a', [u'EY1'])]
     )
예제 #2
0
def _generate_words_letters(number_of_words=100,
                            order=1,
                            max_transitions=30,
                            words_cache=None):
    if words_cache is None:
        words_cache = WordsCache()
    entries = entries_from_cmudict()
    existing_words = [word for word, phones in entries]
    entries = filter_entries(entries, 'Austen')
    sequences = [[letter for letter in word] for word, phones in entries]
    markov_tree = generate_markov_tree(sequences, order=order)

    number_generated = 0
    while number_generated < number_of_words:
        cached_words = words_cache.get_words('letters')
        new_sequence = generate_new_sequence(markov_tree, max_transitions)
        new_word = ''.join(new_sequence)
        # Reject words already in the corpus
        if new_word in existing_words or new_word in cached_words:
            continue
        else:
            words_cache.add_word('letters', new_word)
            print "New word: %s" % new_word
            number_generated += 1
    return words_cache
예제 #3
0
def _generate_words_tuples(number_of_words=100,
                           tuple_length=2,
                           words_cache=None):
    if words_cache is None:
        words_cache = WordsCache()
    entries = entries_from_cmudict()
    existing_words = [word for word, phones in entries]
    entries = filter_entries(entries, 'Austen')
    entry_words = [entry[0] for entry in entries]

    number_generated = 0
    while number_generated < number_of_words:
        cached_words = words_cache.get_words('tuples')
        words_tuple = []
        for i in range(tuple_length):
            word = random.choice(entry_words)
            words_tuple.append(word)
        new_word = ''.join(words_tuple)
        # Reject words in the corpus
        if new_word in cached_words or new_word in existing_words:
            continue
        else:
            words_cache.add_word('tuples', new_word)
            print "New word: %s" % new_word
            number_generated += 1
    return words_cache
예제 #4
0
def _generate_words_letters(number_of_words=100,
                            order=1,
                            max_transitions=30,
                            words_cache=None):
    if words_cache is None:
        words_cache = WordsCache()
    entries = entries_from_cmudict()
    existing_words = [word for word, phones in entries]
    entries = filter_entries(entries, 'Austen')
    sequences = [[letter for letter in word] for word, phones in entries]
    markov_tree = generate_markov_tree(sequences, order=order)

    number_generated = 0
    while number_generated < number_of_words:
        cached_words = words_cache.get_words('letters')
        new_sequence = generate_new_sequence(markov_tree, max_transitions)
        new_word = ''.join(new_sequence)
        # Reject words already in the corpus
        if new_word in existing_words or new_word in cached_words:
            continue
        else:
            words_cache.add_word('letters', new_word)
            print "New word: %s" % new_word
            number_generated += 1
    return words_cache
예제 #5
0
def _generate_words_tuples(number_of_words=100,
                           tuple_length=2,
                           words_cache=None):
    if words_cache is None:
        words_cache = WordsCache()
    entries = entries_from_cmudict()
    existing_words = [word for word, phones in entries]
    entries = filter_entries(entries, 'Austen')
    entry_words = [entry[0] for entry in entries]

    number_generated = 0
    while number_generated < number_of_words:
        cached_words = words_cache.get_words('tuples')
        words_tuple = []
        for i in range(tuple_length):
            word = random.choice(entry_words)
            words_tuple.append(word)
        new_word = ''.join(words_tuple)
        # Reject words in the corpus
        if new_word in cached_words or new_word in existing_words:
            continue
        else:
            words_cache.add_word('tuples', new_word)
            print "New word: %s" % new_word
            number_generated += 1
    return words_cache
예제 #6
0
 def test_words_correct(self):
     """Test the accuracy of the spelling against existing words."""
     entries = entries_from_cmudict()
     entries = filter_entries(entries, 'Austen')
     number_correct = 0
     for entry in entries:
         word, phones = entry
         # clean_phone modifies phones in-place, so
         cleaned_phones = _clean_phones(phones)
         calculated_word = phones_to_word(cleaned_phones)
         if word == calculated_word:
             number_correct += 1
         else:
             print "Incorrect spelling. Expected %s, got %s. Phones: %s" % (
                 word, calculated_word, phones
             )
     self.assertEqual(number_correct, 1001)
예제 #7
0
 def test_entries_from_cmudict(self):
     entries = entries_from_cmudict()
     self.assertEqual(
         entries[:2],
         [(u'a', [u'AH0']), (u'a.', [u'EY1'])]
     )
예제 #8
0
 def test_entries_from_cmudict(self):
     entries = entries_from_cmudict()
     self.assertEqual(entries[:2], [(u'a', [u'AH0']), (u'a.', [u'EY1'])])
예제 #9
0
 def test_filtered_cmu_entries(self):
     entries = entries_from_cmudict()
     entries = filter_entries(entries, 'Austen')
     self.assertEqual(entries[:2], [(u'a', [u'AH0']), (u'a', [u'EY1'])])