def test_count_common_terms_English(): """ Tests common terms counting. """ language = "english" text1 = "Just a test sentence for the purpose of just testing common terms counting." text2 = "This is just a sentence for tests purposes." text1_tokens = text.tokenize(text1) text2_tokens = text.tokenize(text2) text1_stems = text.get_stems(text1_tokens, language) text2_stems = text.get_stems(text2_tokens, language) text1_stems_no_stopwords = set(text.remove_stopwords(text1_stems, language)) text2_stems_no_stopwords = set(text.remove_stopwords(text2_stems, language)) nose.tools.eq_(text.count_common_terms(text1_stems_no_stopwords, text2_stems_no_stopwords), 3) # sentence, purpos3, tests
def test_remove_stopwords(): """ Tests removal of stopwords. """ actual = text.remove_stopwords(["The", "car", "is", "going", "to", "crash", "or", "going", "to", "win"], "english", 3) expected = ['The', 'car', 'going', 'crash', 'going', 'win'] nose.tools.eq_(actual, expected)