Пример #1
0
def test_top_n_grams():
    """test final top n list"""
    ngrams = NgramFrequencies()
    unigrams_dic = {
        "COUNT": 10,
        "time_burton's": 5,
        "burton's_corpse": 4,
        "corpse_bride": 1
    }
    top_n_unigrams = ngrams.top_n_grams(unigrams_dic, 2)
    assert top_n_unigrams == [("time_burton's", 0.5), ("burton's_corpse", 0.4)]
Пример #2
0
def main(file_name):
    """Given the file name, print n-grams frequencies
    String -> None"""
    text = TextCleaner()
    ngrams = NgramFrequencies()
    text.read_file(file_name)
    for i in range(0, len(text.lines)):
        text.pre_process(text.lines[i])

    for word_per_list in text.word_list:
        ngrams.fill_in_dic(word_per_list)

    ngrams_list = [
        ngrams.unigrams_dic, ngrams.bigrams_dic, ngrams.trigrams_dic
    ]
    ngrams_name_list = ["unigrams", "bigrams", "trigrams"]
    for i in range(3):
        grams_top = ngrams.top_n_grams(ngrams_list[i], 10)
        print_output(grams_top, ngrams_name_list[0])