def test_count_common_terms_English(): """ Tests common terms counting. """ language = "english" text1 = "Just a test sentence for the purpose of just testing common terms counting." text2 = "This is just a sentence for tests purposes." text1_tokens = text.tokenize(text1) text2_tokens = text.tokenize(text2) text1_stems = text.get_stems(text1_tokens, language) text2_stems = text.get_stems(text2_tokens, language) text1_stems_no_stopwords = set(text.remove_stopwords(text1_stems, language)) text2_stems_no_stopwords = set(text.remove_stopwords(text2_stems, language)) nose.tools.eq_(text.count_common_terms(text1_stems_no_stopwords, text2_stems_no_stopwords), 3) # sentence, purpos3, tests
def test_tokenize(): """ Tests tokenization. """ actual = text.tokenize("The car is going to Mountain View. You! You \ should go too... Or, maybe, shouldn't!?") expected = ["The", "car", "is", "going", "to", "Mountain", "View", "You", "You", "should", "go", "too", "Or", "maybe", "shouldn", "\'", "t"] nose.tools.eq_(actual, expected)