示例#1
0
def _generate_words_letters(number_of_words=100,
                            order=1,
                            max_transitions=30,
                            words_cache=None):
    if words_cache is None:
        words_cache = WordsCache()
    entries = entries_from_cmudict()
    existing_words = [word for word, phones in entries]
    entries = filter_entries(entries, 'Austen')
    sequences = [[letter for letter in word] for word, phones in entries]
    markov_tree = generate_markov_tree(sequences, order=order)

    number_generated = 0
    while number_generated < number_of_words:
        cached_words = words_cache.get_words('letters')
        new_sequence = generate_new_sequence(markov_tree, max_transitions)
        new_word = ''.join(new_sequence)
        # Reject words already in the corpus
        if new_word in existing_words or new_word in cached_words:
            continue
        else:
            words_cache.add_word('letters', new_word)
            print "New word: %s" % new_word
            number_generated += 1
    return words_cache
示例#2
0
def _generate_words_letters(number_of_words=100,
                            order=1,
                            max_transitions=30,
                            words_cache=None):
    if words_cache is None:
        words_cache = WordsCache()
    entries = entries_from_cmudict()
    existing_words = [word for word, phones in entries]
    entries = filter_entries(entries, 'Austen')
    sequences = [[letter for letter in word] for word, phones in entries]
    markov_tree = generate_markov_tree(sequences, order=order)

    number_generated = 0
    while number_generated < number_of_words:
        cached_words = words_cache.get_words('letters')
        new_sequence = generate_new_sequence(markov_tree, max_transitions)
        new_word = ''.join(new_sequence)
        # Reject words already in the corpus
        if new_word in existing_words or new_word in cached_words:
            continue
        else:
            words_cache.add_word('letters', new_word)
            print "New word: %s" % new_word
            number_generated += 1
    return words_cache
示例#3
0
 def test_first_order(self):
     markov_tree = {
         'START': {
             'B': 1
         },
         'B': {
             'UH': 1
         },
         'UH': {
             'STOP': 1
         }
     }
     sequence = generate_new_sequence(markov_tree, 10)
     self.assertEqual(sequence, ['B', 'UH'])
示例#4
0
 def test_cutoff(self):
     markov_tree = {
         'START': {
             'B': 1
         },
         'B': {
             'UH': 1
         },
         'UH': {
             'L': 1
         },
         'L': {
             'STOP': 1
         }
     }
     sequence = generate_new_sequence(markov_tree, 1)
     self.assertEqual(sequence, ['B'])
示例#5
0
 def test_cutoff(self):
     markov_tree = {
         'START': {
             'B': 1
         },
         'B': {
             'UH': 1
         },
         'UH': {
             'L': 1
         },
         'L': {
             'STOP': 1
         }
     }
     sequence = generate_new_sequence(markov_tree, 1)
     self.assertEqual(sequence, ['B'])
示例#6
0
 def test_second_order(self):
     markov_tree = {
         'START': {
             'START': {
                 'B': 1
             },
             'B': {
                 'UH': 1
             }
         },
         'B': {
             'UH': {
                 'STOP': 1
             }
         },
         'UH': {
             'STOP': {
                 'STOP': 1
             }
         }
     }
     sequence = generate_new_sequence(markov_tree, 10)
     self.assertEqual(sequence, ['B', 'UH'])