def test_prepare_cv(self): cf_cv = ClassifierCv(self.labels, self.texts) cf_cv.prepare_pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('crf', MultinomialNB())]) cf_cv.prepare_cv(3) self.assertEqual(cf_cv.kf.n_splits, 3) self.assertIsNotNone(cf_cv.unique_labels)
def test_random_search(self): cf_cv = ClassifierCv(self.labels, self.texts) cf_cv.prepare_pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('crf', MultinomialNB())]) param_dist = {'crf__alpha': [0.01, 0.02, 0.05, 0.07, 0.09]} cf_cv.perform_random_search(param_dist) params = cf_cv.get_top_random_search_parameters(1) params_print = cf_cv.print_top_random_search(1) self.assertEqual(type(params), dict) self.assertEqual(len(params), 1) self.assertEqual(params_print, None)
def test_prepare_pipeline(self): cf_cv = ClassifierCv(self.labels, self.texts) pipeline = [ ('tfidf', TfidfTransformer()), ('clf', SGDClassifier(loss='hinge', penalty='l2', alpha=1e-3, random_state=42, max_iter=5, tol=None)), ] cf_cv.prepare_pipeline(pipeline) self.assertEqual(cf_cv.text_clf._final_estimator.loss, 'hinge') self.assertEqual(cf_cv.text_clf._final_estimator.max_iter, 5) self.assertEqual(cf_cv.text_clf._final_estimator.penalty, 'l2') self.assertEqual(cf_cv.text_clf._final_estimator.random_state, 42)