def test_ngrams(): # bigrams strings = ["this is my favorite", "book on my bookshelf"] dstrings = nvstrings.to_device(strings) expected = [ "this_is", "is_my", "my_favorite", "favorite_book", "book_on", "on_my", "my_bookshelf", ] tokens = nvtext.tokenize(dstrings) outcome = nvtext.ngrams(tokens, N=2, sep="_") assert outcome.to_host() == expected # trigrams strings = ["this is my favorite", "book on my bookshelf"] dstrings = nvstrings.to_device(strings) expected = [ "this-is-my", "is-my-favorite", "my-favorite-book", "favorite-book-on", "book-on-my", "on-my-bookshelf", ] tokens = nvtext.tokenize(dstrings) outcome = nvtext.ngrams(tokens, N=3, sep="-") assert outcome.to_host() == expected
def test_ngrams(): # bigrams strings = ['this is my favorite', 'book on my bookshelf'] dstrings = nvstrings.to_device(strings) expected = [ 'this_is', 'is_my', 'my_favorite', 'favorite_book', 'book_on', 'on_my', 'my_bookshelf' ] tokens = nvtext.tokenize(dstrings) outcome = nvtext.ngrams(tokens, N=2, sep='_') assert outcome.to_host() == expected # trigrams strings = ['this is my favorite', 'book on my bookshelf'] dstrings = nvstrings.to_device(strings) expected = [ 'this-is-my', 'is-my-favorite', 'my-favorite-book', 'favorite-book-on', 'book-on-my', 'on-my-bookshelf' ] tokens = nvtext.tokenize(dstrings) outcome = nvtext.ngrams(tokens, N=3, sep='-') assert outcome.to_host() == expected