def test_add_items(): """test adding a n-gram""" ngrams = NgramFrequencies() assert "the" not in ngrams.unigrams_dic ngrams.add_item("the", ngrams.unigrams_dic) assert ngrams.unigrams_dic["the"] == 1 ngrams.add_item("the", ngrams.unigrams_dic) assert ngrams.unigrams_dic["the"] == 2 assert ngrams.unigrams_dic["COUNT"] == 2
def main(filename): tc = TextCleaner("corpse_bride.txt") list_of_sentences = tc.read_file() RANK = 10 unigram = NgramFrequencies(RANK) bigram = NgramFrequencies(RANK) trigram = NgramFrequencies(RANK) for sentence in list_of_sentences: words = sentence.split() for i in range(len(words)): unigram.add_item(words[i]) if i < len(words) - 1: bigram.add_item(words[i] + "_" + words[i + 1]) if i < len(words) - 2: trigram.add_item(words[i] + "_" + words[i + 1] + "_" + words[i + 2]) print("Top 10 unigrams:") print(unigram.top_n_freqs()) print("Top 10 bigrams:") print(bigram.top_n_freqs()) print("Top 10 trigrams:") print(trigram.top_n_freqs())
def test_ngram_frequencies(): nf = NgramFrequencies(2) assert nf.add_item("He_is") == 1 assert nf.add_item("He_is") == 2 assert nf.add_item("He_is") == 3 assert nf.add_item("I_am") == 1 assert nf.add_item("I_am") == 2 assert nf.add_item("I_am") == 3 assert nf.add_item("I_am") == 4 assert nf.add_item("I_am") == 5 assert nf.add_item("I_am") == 6 assert nf.add_item("They_are") == 1 assert nf.frequency("They_are") == 0.1 assert nf.top_n_counts() == [("I_am", 6), ("He_is", 3)] assert nf.top_n_freqs() == [("I_am", 0.6), ("He_is", 0.3)]
def test_add_item(): '''Test the add item method''' ngram_freq = NgramFrequencies(1, '') ngram_freq.add_item('ab') assert ngram_freq.dic_ngram['ab'] == 1