def test_arguments(self): try: CountVectorizer(max_doc_freq=-1) except ValueError: assert True try: CountVectorizer(max_features=-1) except ValueError: assert True
def test_transform(self): vectorizer = CountVectorizer(max_doc_freq=2, min_freq=1, max_features=1) vectorizer.fit(self.docs) sequences, X = vectorizer.transform(self.docs) npt.assert_array_equal(X.A, np.asarray([[0], [2], [0]])) vectorizer.binary = True _, X1 = vectorizer.fit_transform(self.docs) _, X2 = vectorizer.transform(self.docs) npt.assert_array_equal(X1.A, X2.A)
def test_with_special_tokens(self): vectorizer = CountVectorizer(max_doc_freq=2, min_freq=1, max_features=1) vectorizer.fit(self.docs) new_vocab = Vocabulary(vectorizer.vocab.idx2tok, use_special_tokens=True) vectorizer.vocab = new_vocab sequences, X = vectorizer.transform(self.docs) npt.assert_array_equal(X.A, np.asarray([[0], [2], [0]]))
def test_bad_freq_arguments(self): vectorizer = CountVectorizer(max_doc_freq=2, min_freq=3) try: vectorizer.fit(self.docs) except ValueError: assert True