Пример #1
0
def main(filename):
    tc = TextCleaner("corpse_bride.txt")
    list_of_sentences = tc.read_file()

    RANK = 10

    unigram = NgramFrequencies(RANK)
    bigram = NgramFrequencies(RANK)
    trigram = NgramFrequencies(RANK)

    for sentence in list_of_sentences:
        words = sentence.split()
        for i in range(len(words)):
            unigram.add_item(words[i])
            if i < len(words) - 1:
                bigram.add_item(words[i] + "_" + words[i + 1])
            if i < len(words) - 2:
                trigram.add_item(words[i] + "_" + words[i + 1] + "_" +
                                 words[i + 2])

    print("Top 10 unigrams:")
    print(unigram.top_n_freqs())
    print("Top 10 bigrams:")
    print(bigram.top_n_freqs())
    print("Top 10 trigrams:")
    print(trigram.top_n_freqs())
Пример #2
0
def test_top_n_freqs():
    '''Test the top n reqs method'''
    ngram_freq = NgramFrequencies(1, '')
    ngram_freq.dic_ngram["ab"] = 1
    ngram_freq.dic_ngram["cd"] = 7
    ngram_freq.dic_ngram["ef"] = 12
    ngram_freq.total_count = 20
    list_top_freq = ngram_freq.top_n_freqs(3)
    assert list_top_freq[0][0] == "ef"
    assert list_top_freq[1][0] == "cd"
    assert list_top_freq[2][0] == "ab"
def main():

    file_name = input('enter file name: ')

    unigrams = NgramFrequencies(1, file_name)
    bigrams = NgramFrequencies(2, file_name)
    trigrams = NgramFrequencies(3, file_name)

    print("\n Top 10 unigrams:")
    unigrams.make_ngram()
    print("", *unigrams.top_n_freqs(10),
          sep="\n    ")

    print("\n Top 10 bigrams:")
    bigrams.make_ngram()
    print("", *bigrams.top_n_freqs(10),
          sep="\n    ")

    print("\n Top 10 trigrams:")
    trigrams.make_ngram()
    print("", *trigrams.top_n_freqs(10), sep="\n    ")
Пример #4
0
def test_ngram_frequencies():
    nf = NgramFrequencies(2)
    assert nf.add_item("He_is") == 1
    assert nf.add_item("He_is") == 2
    assert nf.add_item("He_is") == 3
    assert nf.add_item("I_am") == 1
    assert nf.add_item("I_am") == 2
    assert nf.add_item("I_am") == 3
    assert nf.add_item("I_am") == 4
    assert nf.add_item("I_am") == 5
    assert nf.add_item("I_am") == 6
    assert nf.add_item("They_are") == 1

    assert nf.frequency("They_are") == 0.1

    assert nf.top_n_counts() == [("I_am", 6), ("He_is", 3)]
    assert nf.top_n_freqs() == [("I_am", 0.6), ("He_is", 0.3)]