def test_top_n_freqs(): '''Test the top n reqs method''' ngram_freq = NgramFrequencies(1, '') ngram_freq.dic_ngram["ab"] = 1 ngram_freq.dic_ngram["cd"] = 7 ngram_freq.dic_ngram["ef"] = 12 ngram_freq.total_count = 20 list_top_freq = ngram_freq.top_n_freqs(3) assert list_top_freq[0][0] == "ef" assert list_top_freq[1][0] == "cd" assert list_top_freq[2][0] == "ab"
def test_fill_in_dic(): """test filling in three dictionary""" ngrams = NgramFrequencies() word_per_list = ["time", "burton's", "corpse", "bride"] ngrams.fill_in_dic(word_per_list) assert ngrams.unigrams_dic == { "COUNT": 4, "time": 1, "burton's": 1, "corpse": 1, "bride": 1 } assert ngrams.bigrams_dic == { "COUNT": 3, "time_burton's": 1, "burton's_corpse": 1, "corpse_bride": 1 } assert ngrams.trigrams_dic == { "COUNT": 2, "time_burton's_corpse": 1, "burton's_corpse_bride": 1 }
def test_add_items(): """test adding a n-gram""" ngrams = NgramFrequencies() assert "the" not in ngrams.unigrams_dic ngrams.add_item("the", ngrams.unigrams_dic) assert ngrams.unigrams_dic["the"] == 1 ngrams.add_item("the", ngrams.unigrams_dic) assert ngrams.unigrams_dic["the"] == 2 assert ngrams.unigrams_dic["COUNT"] == 2
def test_top_n_counts(): '''Test the top n counts method''' ngram_freq = NgramFrequencies(1, '') ngram_freq.dic_ngram["ab"] = 3 ngram_freq.dic_ngram["cd"] = 2 ngram_freq.dic_ngram["ef"] = 6 list_top_count = ngram_freq.top_n_counts(3) assert list_top_count[0][0] == "ef" assert list_top_count[1][0] == "ab" assert list_top_count[2][0] == "cd"
def main(filename): tc = TextCleaner("corpse_bride.txt") list_of_sentences = tc.read_file() RANK = 10 unigram = NgramFrequencies(RANK) bigram = NgramFrequencies(RANK) trigram = NgramFrequencies(RANK) for sentence in list_of_sentences: words = sentence.split() for i in range(len(words)): unigram.add_item(words[i]) if i < len(words) - 1: bigram.add_item(words[i] + "_" + words[i + 1]) if i < len(words) - 2: trigram.add_item(words[i] + "_" + words[i + 1] + "_" + words[i + 2]) print("Top 10 unigrams:") print(unigram.top_n_freqs()) print("Top 10 bigrams:") print(bigram.top_n_freqs()) print("Top 10 trigrams:") print(trigram.top_n_freqs())
def test_ngram_frequencies(): nf = NgramFrequencies(2) assert nf.add_item("He_is") == 1 assert nf.add_item("He_is") == 2 assert nf.add_item("He_is") == 3 assert nf.add_item("I_am") == 1 assert nf.add_item("I_am") == 2 assert nf.add_item("I_am") == 3 assert nf.add_item("I_am") == 4 assert nf.add_item("I_am") == 5 assert nf.add_item("I_am") == 6 assert nf.add_item("They_are") == 1 assert nf.frequency("They_are") == 0.1 assert nf.top_n_counts() == [("I_am", 6), ("He_is", 3)] assert nf.top_n_freqs() == [("I_am", 0.6), ("He_is", 0.3)]
def main(): file_name = input('enter file name: ') unigrams = NgramFrequencies(1, file_name) bigrams = NgramFrequencies(2, file_name) trigrams = NgramFrequencies(3, file_name) print("\n Top 10 unigrams:") unigrams.make_ngram() print("", *unigrams.top_n_freqs(10), sep="\n ") print("\n Top 10 bigrams:") bigrams.make_ngram() print("", *bigrams.top_n_freqs(10), sep="\n ") print("\n Top 10 trigrams:") trigrams.make_ngram() print("", *trigrams.top_n_freqs(10), sep="\n ")
def test_constructor(): '''Test the constructor''' ngram_freq = NgramFrequencies(2, '') assert ngram_freq.dic_ngram == {} assert ngram_freq.total_count == 0 assert ngram_freq.N == 2
def test_add_item(): '''Test the add item method''' ngram_freq = NgramFrequencies(1, '') ngram_freq.add_item('ab') assert ngram_freq.dic_ngram['ab'] == 1
def test_frequency(): """test frequency calculation""" ngrams = NgramFrequencies() freq = ngrams.frequency(2, 10) assert freq == 0.2
def test_top_n_freqs(): """test returning a list of items sorted on the frequencies""" ngrams = NgramFrequencies() top_list = [("d", 4), ("c", 3), ("b", 2), ("a", 1)] top_freq = ngrams.top_n_freq(top_list, 10) assert top_freq == [("d", 0.4), ("c", 0.3), ("b", 0.2), ("a", 0.1)]
def test_constructor(): """test the constructor""" ngrams = NgramFrequencies() assert ngrams.unigrams_dic["COUNT"] == 0 assert ngrams.bigrams_dic["COUNT"] == 0 assert ngrams.trigrams_dic["COUNT"] == 0
def test_top_n_counts(): """test returning a list of items sorted on the count""" ngrams = NgramFrequencies() new_dic = {"a": 1, "b": 2, "c": 3, "d": 4} top_list = ngrams.top_n_counts(new_dic) assert top_list == [("d", 4), ("c", 3), ("b", 2), ("a", 1)]