def test_filter_terms_by_ic_max_n_terms(self): dtm, i2t = vsm.filter_terms_by_ic(self.doc_term_matrix, self.id_to_term, min_ic=0.0, max_n_terms=3) self.assertEqual(dtm.shape, (8, 3)) self.assertEqual(len(i2t), 3)
def test_filter_terms_by_ic_max_n_terms(self): dtm, vocab = vsm.filter_terms_by_ic(self.doc_term_matrix, self.vectorizer.vocabulary, min_ic=0.0, max_n_terms=3) self.assertEqual(dtm.shape, (8, 3)) self.assertEqual(len(vocab), 3)
def test_filter_terms_by_ic_identity(self): dtm, i2t = vsm.filter_terms_by_ic(self.doc_term_matrix, self.id_to_term, min_ic=0.0, max_n_terms=None) self.assertEqual(dtm.shape, self.doc_term_matrix.shape) self.assertEqual(i2t, self.id_to_term)
def test_filter_terms_by_ic_identity(self): dtm, vocab = vsm.filter_terms_by_ic(self.doc_term_matrix, self.vectorizer.vocabulary, min_ic=0.0, max_n_terms=None) self.assertEqual(dtm.shape, self.doc_term_matrix.shape) self.assertEqual(vocab, self.vectorizer.vocabulary)
def test_filter_terms_by_ic_max_n_terms(vectorizer_and_dtm): vectorizer, doc_term_matrix = vectorizer_and_dtm dtm, vocab = vsm.filter_terms_by_ic( doc_term_matrix, vectorizer.vocabulary_terms, min_ic=0.0, max_n_terms=3) assert dtm.shape == (8, 3) assert len(vocab) == 3
def test_filter_terms_by_ic_identity(vectorizer_and_dtm): vectorizer, doc_term_matrix = vectorizer_and_dtm dtm, vocab = vsm.filter_terms_by_ic( doc_term_matrix, vectorizer.vocabulary_terms, min_ic=0.0, max_n_terms=None) assert dtm.shape == doc_term_matrix.shape assert vocab == vectorizer.vocabulary_terms