Пример #1
0
def test_sort_top_terms():
    a = analyzer.Analyzer()
    words = {'came': 1, 'saw': 1, 'conquered': 1, 'I': 3}
    result1 = a.sort_top_terms(words, 1)
    # result2 = a.sort_top_terms(words, 2)

    assert result1 == [('I', 3)]
Пример #2
0
def test_remove_no_stop_words():
    a = analyzer.Analyzer()
    input_words = ['a', 'fantastic', 'test']
    stopwords = ['foo']
    result = a.remove_stop_words(input_words, stopwords)

    assert result == input_words
Пример #3
0
def test_remove_punctuation():
    a = analyzer.Analyzer()
    text = 'I; really! should. stop:: putting??? so&*, many punctuation marks In my Sentences!!!&!$)'
    result = a.remove_punctuation(text)

    assert result == [
        'I', 'really', 'should', 'stop', 'putting', 'so', 'many',
        'punctuation', 'marks', 'In', 'my', 'Sentences'
    ]
Пример #4
0
def test_compute_term_frequency():
    a = analyzer.Analyzer()
    words = ['a', 'man', 'a', 'plan', 'a', 'canal', 'Panama']
    result = a.compute_term_frequency(words)

    assert result['a'] == 3
    assert result['man'] == 1
    assert result['plan'] == 1
    assert result['canal'] == 1
    assert result['Panama'] == 1
Пример #5
0
def test_stem():
    a = analyzer.Analyzer()
    words = ['jumping', 'jumps', 'jumped', 'jump']
    result = a.stem(words)

    assert result == ['jump', 'jump', 'jump', 'jump']
Пример #6
0
from analyzer import analyzer

a = analyzer.Analyzer()
a.clear_screen()
a.greeting()

input_words = []
stopwords = []
while input_words == []:
    input_words = a.get_input_words()
while stopwords == []:
    stopwords = a.get_stopwords()

filtered_words = a.remove_stop_words(input_words, stopwords)
root_words = a.stem(filtered_words)
frequency = a.compute_term_frequency(root_words)
common_terms = a.sort_top_terms(frequency, 20)
a.print_top_terms(common_terms)