def test_top_n_grams(): """test final top n list""" ngrams = NgramFrequencies() unigrams_dic = { "COUNT": 10, "time_burton's": 5, "burton's_corpse": 4, "corpse_bride": 1 } top_n_unigrams = ngrams.top_n_grams(unigrams_dic, 2) assert top_n_unigrams == [("time_burton's", 0.5), ("burton's_corpse", 0.4)]
def main(file_name): """Given the file name, print n-grams frequencies String -> None""" text = TextCleaner() ngrams = NgramFrequencies() text.read_file(file_name) for i in range(0, len(text.lines)): text.pre_process(text.lines[i]) for word_per_list in text.word_list: ngrams.fill_in_dic(word_per_list) ngrams_list = [ ngrams.unigrams_dic, ngrams.bigrams_dic, ngrams.trigrams_dic ] ngrams_name_list = ["unigrams", "bigrams", "trigrams"] for i in range(3): grams_top = ngrams.top_n_grams(ngrams_list[i], 10) print_output(grams_top, ngrams_name_list[0])