def test_tokenize_method(self): tokenizer = nltk.tokenize.TabTokenizer() blob = tb.TextBlob("This is\ttext.") # If called without arguments, should default to WordTokenizer assert_equal(blob.tokenize(), tb.WordList(["This", "is", "text", "."])) # Pass in the TabTokenizer assert_equal(blob.tokenize(tokenizer), tb.WordList(["This is", "text."]))
def test_slicing(self): wl = tb.WordList(self.words) first = wl[0] assert_true(isinstance(first, tb.Word)) assert_equal(first, 'Beautiful') dogs = wl[0:2] assert_true(isinstance(dogs, tb.WordList)) assert_equal(dogs, tb.WordList(['Beautiful', 'is']))
def test_ngrams(self): blob = tb.TextBlob("I am eating a pizza.") three_grams = blob.ngrams() assert_equal(three_grams, [ tb.WordList(('I', 'am', 'eating')), tb.WordList(('am', 'eating', 'a')), tb.WordList(('eating', 'a', 'pizza')) ]) four_grams = blob.ngrams(n=4) assert_equal(four_grams, [ tb.WordList(('I', 'am', 'eating', 'a')), tb.WordList(('am', 'eating', 'a', 'pizza')) ])
def test_join(self): l = ['explicit', 'is', 'better'] wl = tb.WordList(l) assert_equal( tb.TextBlob(' ').join(l), tb.TextBlob('explicit is better')) assert_equal( tb.TextBlob(' ').join(wl), tb.TextBlob('explicit is better'))
def test_repr(self): wl = tb.WordList(['Beautiful', 'is', 'better']) if PY2: assert_equal(repr(wl), "WordList([u'Beautiful', u'is', u'better'])") else: assert_equal(repr(wl), "WordList(['Beautiful', 'is', 'better'])")
def test_overrides(self): b = tb.Blobber(tokenizer=SentenceTokenizer(), np_extractor=ConllExtractor()) blob = b("How now? Brown cow?") assert_true(isinstance(blob.tokenizer, SentenceTokenizer)) assert_equal(blob.tokens, tb.WordList(["How now?", "Brown cow?"])) blob2 = b("Another blob") # blobs have the same tokenizer assert_true(blob.tokenizer is blob2.tokenizer) # but aren't the same object assert_not_equal(blob, blob2)
def test_words(self): blob = tb.TextBlob('Beautiful is better than ugly. ' 'Explicit is better than implicit.') assert_true(isinstance(blob.words, tb.WordList)) assert_equal(blob.words, tb.WordList([ 'Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', ])) short = tb.TextBlob("Just a bundle of words") assert_equal(short.words, tb.WordList([ 'Just', 'a', 'bundle', 'of', 'words' ]))
def test_count(self): wl = tb.WordList(['monty', 'python', 'Python', 'Monty']) assert_equal(wl.count('monty'), 2) assert_equal(wl.count('monty', case_sensitive=True), 1) assert_equal(wl.count('mon'), 0)
def test_strip_and_words(self): blob = tb.TextBlob('Beautiful is better! ') assert_equal(blob.strip().words, tb.WordList(['Beautiful', 'is', 'better']))
def test_upper(self): wl = tb.WordList(self.words) assert_equal(wl.upper(), tb.WordList([w.upper() for w in self.words]))
def test_lower(self): wl = tb.WordList(['Zen', 'oF', 'PYTHON']) assert_equal(wl.lower(), tb.WordList(['zen', 'of', 'python']))
def test_can_use_an_different_tokenizer(self): tokenizer = nltk.tokenize.TabTokenizer() blob = tb.TextBlob("This is\ttext.", tokenizer=tokenizer) assert_equal(blob.tokens, tb.WordList(["This is", "text."]))
def test_lemmatize(self): wl = tb.WordList(["cat", "dogs", "oxen"]) assert_equal(wl.lemmatize(), tb.WordList(['cat', 'dog', 'ox']))
def test_tokens_property(self): assert_true(self.blob.tokens, tb.WordList(WordTokenizer().tokenize(self.text)))
def test_pluralize(self): wl = tb.WordList(['dog', 'cat', 'buffalo']) assert_equal(wl.pluralize(), tb.WordList(['dogs', 'cats', 'buffaloes']))
def test_extend(self): wl = tb.WordList(["cats", "dogs"]) wl.extend(["buffalo", 4]) assert_true(isinstance(wl[2], tb.Word)) assert_true(isinstance(wl[3], int))
def test_convert_to_list(self): wl = tb.WordList(self.words) assert_equal(list(wl), self.words)
def test_len(self): wl = tb.WordList(['Beautiful', 'is', 'better']) assert_equal(len(wl), 3)
def test_upper_and_words(self): blob = tb.TextBlob('beautiful is better') assert_equal(blob.upper().words, tb.WordList(['BEAUTIFUL', 'IS', 'BETTER']))
def test_split(self): blob = tb.TextBlob('Beautiful is better') assert_equal(blob.split(), tb.WordList(['Beautiful', 'is', 'better']))
def test_append(self): wl = tb.WordList(['dog']) wl.append("cat") assert_true(isinstance(wl[1], tb.Word)) wl.append(('a', 'tuple')) assert_true(isinstance(wl[2], tuple))
def test_singularize(self): wl = tb.WordList(['dogs', 'cats', 'buffaloes', 'men', 'mice']) assert_equal(wl.singularize(), tb.WordList(['dog', 'cat', 'buffalo', 'man', 'mouse']))