def test_next_word_is(self): '''Test next_word_probability for is''' next_words = next_word_probability(TEST_TEXT1, 'is') self.assertEqual(2, len(next_words), 2) self.assertEqual(2, next_words['a'], 2) self.assertEqual(1, next_words['not'], 1) next_words = probability_from_count(next_words) self.assertEqual(TWO_THIRDS, next_words['a']) self.assertEqual(ONE_THIRD, next_words['not'])
def test_next_word_a(self): '''Test next_word_probability for a''' next_words = next_word_probability(TEST_TEXT1, 'a') self.assertEqual(2, len(next_words)) self.assertEqual(2, next_words['test']) self.assertEqual(1, next_words['mess']) next_words = probability_from_count(next_words) self.assertEqual(TWO_THIRDS, next_words['test']) self.assertEqual(ONE_THIRD, next_words['mess'])
def later_words_probabilities(sample, word, distance): ''' @param sample: a sample of text to draw from @param word: a word occuring before a corrupted sequence @param distance: how many words later to estimate (i.e. 1 for the next word, 2 for the word after that) @returns: a single word which is the most likely possibility ''' # Given a word, collect the relative probabilities of possible following words # from @sample. You may want to import your code from the maximum likelihood exercise. next_words = next_word_probability(sample, word) next_words = probability_from_count(next_words) node = [] node.append(next_words) # Repeat the above process--for each distance beyond 1, evaluate the words that # might come after each word, and combine them weighting by relative probability # into an estimate of what might appear next. if distance > 1: children = dict() for a_word in next_words.iterkeys(): children[a_word] = later_words_probabilities(sample, a_word, distance - 1) node.append(children) return node