示例#1
0
def test_count_common_terms_English():
    """ Tests common terms counting.
    """
    language = "english"
    text1 = "Just a test sentence for the purpose of just testing common terms counting."
    text2 = "This is just a sentence for tests purposes."
    text1_tokens = text.tokenize(text1)
    text2_tokens = text.tokenize(text2)
    text1_stems = text.get_stems(text1_tokens, language)
    text2_stems = text.get_stems(text2_tokens, language)
    text1_stems_no_stopwords = set(text.remove_stopwords(text1_stems, language))
    text2_stems_no_stopwords = set(text.remove_stopwords(text2_stems, language))
    nose.tools.eq_(text.count_common_terms(text1_stems_no_stopwords,
                                           text2_stems_no_stopwords),
                   3)  # sentence, purpos3, tests
示例#2
0
def test_tokenize():
    """ Tests tokenization.
    """
    actual = text.tokenize("The car is going to Mountain View. You! You \
                            should go too... Or, maybe, shouldn't!?")
    expected = ["The", "car", "is", "going", "to", "Mountain", "View", "You",
                "You", "should", "go", "too", "Or", "maybe", "shouldn", "\'",
                "t"]

    nose.tools.eq_(actual, expected)