示例#1
0
def test_top_n_freqs():
    '''Test the top n reqs method'''
    ngram_freq = NgramFrequencies(1, '')
    ngram_freq.dic_ngram["ab"] = 1
    ngram_freq.dic_ngram["cd"] = 7
    ngram_freq.dic_ngram["ef"] = 12
    ngram_freq.total_count = 20
    list_top_freq = ngram_freq.top_n_freqs(3)
    assert list_top_freq[0][0] == "ef"
    assert list_top_freq[1][0] == "cd"
    assert list_top_freq[2][0] == "ab"
示例#2
0
def test_fill_in_dic():
    """test filling in three dictionary"""
    ngrams = NgramFrequencies()
    word_per_list = ["time", "burton's", "corpse", "bride"]
    ngrams.fill_in_dic(word_per_list)
    assert ngrams.unigrams_dic == {
        "COUNT": 4,
        "time": 1,
        "burton's": 1,
        "corpse": 1,
        "bride": 1
    }
    assert ngrams.bigrams_dic == {
        "COUNT": 3,
        "time_burton's": 1,
        "burton's_corpse": 1,
        "corpse_bride": 1
    }
    assert ngrams.trigrams_dic == {
        "COUNT": 2,
        "time_burton's_corpse": 1,
        "burton's_corpse_bride": 1
    }
示例#3
0
def test_add_items():
    """test adding a n-gram"""
    ngrams = NgramFrequencies()
    assert "the" not in ngrams.unigrams_dic
    ngrams.add_item("the", ngrams.unigrams_dic)
    assert ngrams.unigrams_dic["the"] == 1
    ngrams.add_item("the", ngrams.unigrams_dic)
    assert ngrams.unigrams_dic["the"] == 2
    assert ngrams.unigrams_dic["COUNT"] == 2
示例#4
0
def test_top_n_counts():
    '''Test the top n counts method'''
    ngram_freq = NgramFrequencies(1, '')
    ngram_freq.dic_ngram["ab"] = 3
    ngram_freq.dic_ngram["cd"] = 2
    ngram_freq.dic_ngram["ef"] = 6
    list_top_count = ngram_freq.top_n_counts(3)
    assert list_top_count[0][0] == "ef"
    assert list_top_count[1][0] == "ab"
    assert list_top_count[2][0] == "cd"
示例#5
0
def main(filename):
    tc = TextCleaner("corpse_bride.txt")
    list_of_sentences = tc.read_file()

    RANK = 10

    unigram = NgramFrequencies(RANK)
    bigram = NgramFrequencies(RANK)
    trigram = NgramFrequencies(RANK)

    for sentence in list_of_sentences:
        words = sentence.split()
        for i in range(len(words)):
            unigram.add_item(words[i])
            if i < len(words) - 1:
                bigram.add_item(words[i] + "_" + words[i + 1])
            if i < len(words) - 2:
                trigram.add_item(words[i] + "_" + words[i + 1] + "_" +
                                 words[i + 2])

    print("Top 10 unigrams:")
    print(unigram.top_n_freqs())
    print("Top 10 bigrams:")
    print(bigram.top_n_freqs())
    print("Top 10 trigrams:")
    print(trigram.top_n_freqs())
示例#6
0
def test_ngram_frequencies():
    nf = NgramFrequencies(2)
    assert nf.add_item("He_is") == 1
    assert nf.add_item("He_is") == 2
    assert nf.add_item("He_is") == 3
    assert nf.add_item("I_am") == 1
    assert nf.add_item("I_am") == 2
    assert nf.add_item("I_am") == 3
    assert nf.add_item("I_am") == 4
    assert nf.add_item("I_am") == 5
    assert nf.add_item("I_am") == 6
    assert nf.add_item("They_are") == 1

    assert nf.frequency("They_are") == 0.1

    assert nf.top_n_counts() == [("I_am", 6), ("He_is", 3)]
    assert nf.top_n_freqs() == [("I_am", 0.6), ("He_is", 0.3)]
def main():

    file_name = input('enter file name: ')

    unigrams = NgramFrequencies(1, file_name)
    bigrams = NgramFrequencies(2, file_name)
    trigrams = NgramFrequencies(3, file_name)

    print("\n Top 10 unigrams:")
    unigrams.make_ngram()
    print("", *unigrams.top_n_freqs(10),
          sep="\n    ")

    print("\n Top 10 bigrams:")
    bigrams.make_ngram()
    print("", *bigrams.top_n_freqs(10),
          sep="\n    ")

    print("\n Top 10 trigrams:")
    trigrams.make_ngram()
    print("", *trigrams.top_n_freqs(10), sep="\n    ")
示例#8
0
def test_constructor():
    '''Test the constructor'''
    ngram_freq = NgramFrequencies(2, '')
    assert ngram_freq.dic_ngram == {}
    assert ngram_freq.total_count == 0
    assert ngram_freq.N == 2
示例#9
0
def test_add_item():
    '''Test the add item method'''
    ngram_freq = NgramFrequencies(1, '')
    ngram_freq.add_item('ab')
    assert ngram_freq.dic_ngram['ab'] == 1
示例#10
0
def test_frequency():
    """test frequency calculation"""
    ngrams = NgramFrequencies()
    freq = ngrams.frequency(2, 10)
    assert freq == 0.2
示例#11
0
def test_top_n_freqs():
    """test returning a list of items sorted on the frequencies"""
    ngrams = NgramFrequencies()
    top_list = [("d", 4), ("c", 3), ("b", 2), ("a", 1)]
    top_freq = ngrams.top_n_freq(top_list, 10)
    assert top_freq == [("d", 0.4), ("c", 0.3), ("b", 0.2), ("a", 0.1)]
示例#12
0
def test_constructor():
    """test the constructor"""
    ngrams = NgramFrequencies()
    assert ngrams.unigrams_dic["COUNT"] == 0
    assert ngrams.bigrams_dic["COUNT"] == 0
    assert ngrams.trigrams_dic["COUNT"] == 0
示例#13
0
def test_top_n_counts():
    """test returning a list of items sorted on the count"""
    ngrams = NgramFrequencies()
    new_dic = {"a": 1, "b": 2, "c": 3, "d": 4}
    top_list = ngrams.top_n_counts(new_dic)
    assert top_list == [("d", 4), ("c", 3), ("b", 2), ("a", 1)]