def test_ovr_multinomial_iris(): # Test that OvR and multinomial are correct using the iris dataset. train, target = iris.data, iris.target n_samples, n_features = train.shape # The cv indices from stratified kfold (where stratification is done based # on the fine-grained iris classes, i.e, before the classes 0 and 1 are # conflated) is used for both clf and clf1 n_cv = 2 cv = StratifiedKFold(n_cv) precomputed_folds = list(cv.split(train, target)) # Train clf on the original dataset where classes 0 and 1 are separated clf = LogisticRegressionCV(cv=precomputed_folds) clf.fit(train, target) # Conflate classes 0 and 1 and train clf1 on this modified dataset clf1 = LogisticRegressionCV(cv=precomputed_folds) target_copy = target.copy() target_copy[target_copy == 0] = 1 clf1.fit(train, target_copy) # Ensure that what OvR learns for class2 is same regardless of whether # classes 0 and 1 are separated or not assert_array_almost_equal(clf.scores_[2], clf1.scores_[2]) assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_) assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_) # Test the shape of various attributes. assert_equal(clf.coef_.shape, (3, n_features)) assert_array_equal(clf.classes_, [0, 1, 2]) coefs_paths = np.asarray(list(clf.coefs_paths_.values())) assert_array_almost_equal(coefs_paths.shape, (3, n_cv, 10, n_features + 1)) assert_equal(clf.Cs_.shape, (10, )) scores = np.asarray(list(clf.scores_.values())) assert_equal(scores.shape, (3, n_cv, 10)) # Test that for the iris data multinomial gives a better accuracy than OvR for solver in ['lbfgs', 'newton-cg', 'sag']: max_iter = 100 if solver == 'sag' else 15 clf_multi = LogisticRegressionCV(solver=solver, multi_class='multinomial', max_iter=max_iter, random_state=42, tol=1e-2, cv=2) clf_multi.fit(train, target) multi_score = clf_multi.score(train, target) ovr_score = clf.score(train, target) assert_greater(multi_score, ovr_score) # Test attributes of LogisticRegressionCV assert_equal(clf.coef_.shape, clf_multi.coef_.shape) assert_array_equal(clf_multi.classes_, [0, 1, 2]) coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values())) assert_array_almost_equal(coefs_paths.shape, (3, n_cv, 10, n_features + 1)) assert_equal(clf_multi.Cs_.shape, (10, )) scores = np.asarray(list(clf_multi.scores_.values())) assert_equal(scores.shape, (3, n_cv, 10))
def test_logistic_cv_score_does_not_warn_by_default(): lr = LogisticRegressionCV(cv=2) lr.fit(X, Y1) with pytest.warns(None) as record: lr.score(X, lr.predict(X)) assert len(record) == 0
def test_ovr_multinomial_iris(): # Test that OvR and multinomial are correct using the iris dataset. train, target = iris.data, iris.target n_samples, n_features = train.shape # The cv indices from stratified kfold (where stratification is done based # on the fine-grained iris classes, i.e, before the classes 0 and 1 are # conflated) is used for both clf and clf1 n_cv = 2 cv = StratifiedKFold(n_cv) precomputed_folds = list(cv.split(train, target)) # Train clf on the original dataset where classes 0 and 1 are separated clf = LogisticRegressionCV(cv=precomputed_folds) clf.fit(train, target) # Conflate classes 0 and 1 and train clf1 on this modified dataset clf1 = LogisticRegressionCV(cv=precomputed_folds) target_copy = target.copy() target_copy[target_copy == 0] = 1 clf1.fit(train, target_copy) # Ensure that what OvR learns for class2 is same regardless of whether # classes 0 and 1 are separated or not assert_array_almost_equal(clf.scores_[2], clf1.scores_[2]) assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_) assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_) # Test the shape of various attributes. assert_equal(clf.coef_.shape, (3, n_features)) assert_array_equal(clf.classes_, [0, 1, 2]) coefs_paths = np.asarray(list(clf.coefs_paths_.values())) assert_array_almost_equal(coefs_paths.shape, (3, n_cv, 10, n_features + 1)) assert_equal(clf.Cs_.shape, (10,)) scores = np.asarray(list(clf.scores_.values())) assert_equal(scores.shape, (3, n_cv, 10)) # Test that for the iris data multinomial gives a better accuracy than OvR for solver in ['lbfgs', 'newton-cg', 'sag', 'saga']: max_iter = 2000 if solver in ['sag', 'saga'] else 15 clf_multi = LogisticRegressionCV( solver=solver, multi_class='multinomial', max_iter=max_iter, random_state=42, tol=1e-5 if solver in ['sag', 'saga'] else 1e-2, cv=2) clf_multi.fit(train, target) multi_score = clf_multi.score(train, target) ovr_score = clf.score(train, target) assert_greater(multi_score, ovr_score) # Test attributes of LogisticRegressionCV assert_equal(clf.coef_.shape, clf_multi.coef_.shape) assert_array_equal(clf_multi.classes_, [0, 1, 2]) coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values())) assert_array_almost_equal(coefs_paths.shape, (3, n_cv, 10, n_features + 1)) assert_equal(clf_multi.Cs_.shape, (10,)) scores = np.asarray(list(clf_multi.scores_.values())) assert_equal(scores.shape, (3, n_cv, 10))
def test_logistic_cv_mock_scorer(): class MockScorer(object): def __init__(self): self.calls = 0 self.scores = [0.1, 0.4, 0.8, 0.5] def __call__(self, model, X, y, sample_weight=None): score = self.scores[self.calls % len(self.scores)] self.calls += 1 return score mock_scorer = MockScorer() Cs = [1, 2, 3, 4] cv = 2 lr = LogisticRegressionCV(Cs=Cs, scoring=mock_scorer, cv=cv) lr.fit(X, Y1) # Cs[2] has the highest score (0.8) from MockScorer assert lr.C_[0] == Cs[2] # scorer called 8 times (cv*len(Cs)) assert mock_scorer.calls == cv * len(Cs) # reset mock_scorer mock_scorer.calls = 0 with pytest.warns(ChangedBehaviorWarning): custom_score = lr.score(X, lr.predict(X)) assert custom_score == mock_scorer.scores[0] assert mock_scorer.calls == 1
def test_ovr_multinomial_iris(): # Test that OvR and multinomial are correct using the iris dataset. train, target = iris.data, iris.target n_samples, n_features = train.shape # Use pre-defined fold as folds generated for different y cv = StratifiedKFold(target, 3) clf = LogisticRegressionCV(cv=cv) clf.fit(train, target) clf1 = LogisticRegressionCV(cv=cv) target_copy = target.copy() target_copy[target_copy == 0] = 1 clf1.fit(train, target_copy) assert_array_almost_equal(clf.scores_[2], clf1.scores_[2]) assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_) assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_) # Test the shape of various attributes. assert_equal(clf.coef_.shape, (3, n_features)) assert_array_equal(clf.classes_, [0, 1, 2]) coefs_paths = np.asarray(list(clf.coefs_paths_.values())) assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1)) assert_equal(clf.Cs_.shape, (10, )) scores = np.asarray(list(clf.scores_.values())) assert_equal(scores.shape, (3, 3, 10)) # Test that for the iris data multinomial gives a better accuracy than OvR for solver in ['lbfgs', 'newton-cg']: clf_multi = LogisticRegressionCV( solver=solver, multi_class='multinomial', max_iter=15 ) clf_multi.fit(train, target) multi_score = clf_multi.score(train, target) ovr_score = clf.score(train, target) assert_greater(multi_score, ovr_score) # Test attributes of LogisticRegressionCV assert_equal(clf.coef_.shape, clf_multi.coef_.shape) assert_array_equal(clf_multi.classes_, [0, 1, 2]) coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values())) assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1)) assert_equal(clf_multi.Cs_.shape, (10, )) scores = np.asarray(list(clf_multi.scores_.values())) assert_equal(scores.shape, (3, 3, 10))
def test_ovr_multinomial_iris(): # Test that OvR and multinomial are correct using the iris dataset. train, target = iris.data, iris.target n_samples, n_features = train.shape # Use pre-defined fold as folds generated for different y cv = StratifiedKFold(target, 3) clf = LogisticRegressionCV(cv=cv) clf.fit(train, target) clf1 = LogisticRegressionCV(cv=cv) target_copy = target.copy() target_copy[target_copy == 0] = 1 clf1.fit(train, target_copy) assert_array_almost_equal(clf.scores_[2], clf1.scores_[2]) assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_) assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_) # Test the shape of various attributes. assert_equal(clf.coef_.shape, (3, n_features)) assert_array_equal(clf.classes_, [0, 1, 2]) coefs_paths = np.asarray(list(clf.coefs_paths_.values())) assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1)) assert_equal(clf.Cs_.shape, (10, )) scores = np.asarray(list(clf.scores_.values())) assert_equal(scores.shape, (3, 3, 10)) # Test that for the iris data multinomial gives a better accuracy than OvR for solver in ['lbfgs', 'newton-cg']: clf_multi = LogisticRegressionCV(solver=solver, multi_class='multinomial', max_iter=15) clf_multi.fit(train, target) multi_score = clf_multi.score(train, target) ovr_score = clf.score(train, target) assert_greater(multi_score, ovr_score) # Test attributes of LogisticRegressionCV assert_equal(clf.coef_.shape, clf_multi.coef_.shape) assert_array_equal(clf_multi.classes_, [0, 1, 2]) coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values())) assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1)) assert_equal(clf_multi.Cs_.shape, (10, )) scores = np.asarray(list(clf_multi.scores_.values())) assert_equal(scores.shape, (3, 3, 10))