示例#1
0
def test_ngrams():
    # bigrams
    strings = ["this is my favorite", "book on my bookshelf"]
    dstrings = nvstrings.to_device(strings)
    expected = [
        "this_is",
        "is_my",
        "my_favorite",
        "favorite_book",
        "book_on",
        "on_my",
        "my_bookshelf",
    ]
    tokens = nvtext.tokenize(dstrings)
    outcome = nvtext.ngrams(tokens, N=2, sep="_")
    assert outcome.to_host() == expected

    # trigrams
    strings = ["this is my favorite", "book on my bookshelf"]
    dstrings = nvstrings.to_device(strings)
    expected = [
        "this-is-my",
        "is-my-favorite",
        "my-favorite-book",
        "favorite-book-on",
        "book-on-my",
        "on-my-bookshelf",
    ]
    tokens = nvtext.tokenize(dstrings)
    outcome = nvtext.ngrams(tokens, N=3, sep="-")
    assert outcome.to_host() == expected
示例#2
0
def test_ngrams():
    # bigrams
    strings = ['this is my favorite', 'book on my bookshelf']
    dstrings = nvstrings.to_device(strings)
    expected = [
        'this_is',
        'is_my',
        'my_favorite',
        'favorite_book',
        'book_on',
        'on_my',
        'my_bookshelf'
    ]
    tokens = nvtext.tokenize(dstrings)
    outcome = nvtext.ngrams(tokens, N=2, sep='_')
    assert outcome.to_host() == expected

    # trigrams
    strings = ['this is my favorite', 'book on my bookshelf']
    dstrings = nvstrings.to_device(strings)
    expected = [
        'this-is-my',
        'is-my-favorite',
        'my-favorite-book',
        'favorite-book-on',
        'book-on-my',
        'on-my-bookshelf'
    ]
    tokens = nvtext.tokenize(dstrings)
    outcome = nvtext.ngrams(tokens, N=3, sep='-')
    assert outcome.to_host() == expected