def test_calculate_log_probabilities_empty_frequencies(self): ngram = NGramTrie(2) ngram.n_gram_frequencies = {} actual = ngram.calculate_log_probabilities() self.assertEqual(ngram.n_gram_log_probabilities, {}) self.assertEqual(1, actual)
def test_calculate_log_probabilities_one_bi_gram(self): ngram = NGramTrie(2) ngram.n_gram_frequencies = {(1, 2): 10} actual = ngram.calculate_log_probabilities() self.assertEqual(ngram.n_gram_log_probabilities[(1, 2)], 0.0) self.assertEqual(0, actual)
def test_top_n_grams_more(self): ngram = NGramTrie(2) top_n = 2000000 ngram.n_gram_frequencies = {(1, 2): 100, (2, 3): 123, (3, 4): 12345} expected = ((3, 4), (2, 3), (1, 2)) actual = ngram.top_n_grams(top_n) self.assertEqual(expected, actual)
def test_calculate_log_probabilities_ideal(self): ngram = NGramTrie(2) ngram.n_gram_frequencies = {(1, 2): 10, (1, 3): 2, (2, 5): 5} first_prob = math.log(10 / 12) second_prob = math.log(2 / 12) actual = ngram.calculate_log_probabilities() self.assertEqual(ngram.n_gram_log_probabilities[(1, 2)], first_prob) self.assertEqual(ngram.n_gram_log_probabilities[(1, 3)], second_prob) self.assertEqual(0, actual)