Пример #1
0
def test_linearsvx_loss_penalty_deprecations():
    X, y = [[0.0], [1.0]], [0, 1]

    msg = ("loss='%s' has been deprecated in favor of "
           "loss='%s' as of 0.16. Backward compatibility"
           " for the %s will be removed in %s")

    # LinearSVC
    # loss l1 --> hinge
    assert_warns_message(FutureWarning,
                         msg % ("l1", "hinge", "loss='l1'", "0.23"),
                         svm.LinearSVC(loss="l1").fit, X, y)

    # loss l2 --> squared_hinge
    assert_warns_message(FutureWarning,
                         msg % ("l2", "squared_hinge", "loss='l2'", "0.23"),
                         svm.LinearSVC(loss="l2").fit, X, y)

    # LinearSVR
    # loss l1 --> epsilon_insensitive
    assert_warns_message(
        FutureWarning,
        msg % ("l1", "epsilon_insensitive", "loss='l1'", "0.23"),
        svm.LinearSVR(loss="l1").fit, X, y)

    # loss l2 --> squared_epsilon_insensitive
    assert_warns_message(
        FutureWarning,
        msg % ("l2", "squared_epsilon_insensitive", "loss='l2'", "0.23"),
        svm.LinearSVR(loss="l2").fit, X, y)
Пример #2
0
def test_linearsvc():
    # Test basic routines using LinearSVC
    clf = svm.LinearSVC(random_state=0).fit(X, Y)

    # by default should have intercept
    assert clf.fit_intercept

    assert_array_equal(clf.predict(T), true_result)
    assert_array_almost_equal(clf.intercept_, [0], decimal=3)

    # the same with l1 penalty
    clf = svm.LinearSVC(penalty='l1',
                        loss='squared_hinge',
                        dual=False,
                        random_state=0).fit(X, Y)
    assert_array_equal(clf.predict(T), true_result)

    # l2 penalty with dual formulation
    clf = svm.LinearSVC(penalty='l2', dual=True, random_state=0).fit(X, Y)
    assert_array_equal(clf.predict(T), true_result)

    # l2 penalty, l1 loss
    clf = svm.LinearSVC(penalty='l2', loss='hinge', dual=True, random_state=0)
    clf.fit(X, Y)
    assert_array_equal(clf.predict(T), true_result)

    # test also decision function
    dec = clf.decision_function(T)
    res = (dec > 0).astype(np.int) + 1
    assert_array_equal(res, true_result)
Пример #3
0
def test_linearsvc_fit_sampleweight():
    # check correct result when sample_weight is 1
    n_samples = len(X)
    unit_weight = np.ones(n_samples)
    clf = svm.LinearSVC(random_state=0).fit(X, Y)
    clf_unitweight = svm.LinearSVC(random_state=0, tol=1e-12, max_iter=1000).\
        fit(X, Y, sample_weight=unit_weight)

    # check if same as sample_weight=None
    assert_array_equal(clf_unitweight.predict(T), clf.predict(T))
    assert_allclose(clf.coef_, clf_unitweight.coef_, 1, 0.0001)

    # check that fit(X)  = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
    # X = X1 repeated n1 times, X2 repeated n2 times and so forth

    random_state = check_random_state(0)
    random_weight = random_state.randint(0, 10, n_samples)
    lsvc_unflat = svm.LinearSVC(random_state=0, tol=1e-12, max_iter=1000).\
        fit(X, Y, sample_weight=random_weight)
    pred1 = lsvc_unflat.predict(T)

    X_flat = np.repeat(X, random_weight, axis=0)
    y_flat = np.repeat(Y, random_weight, axis=0)
    lsvc_flat = svm.LinearSVC(random_state=0, tol=1e-12,
                              max_iter=1000).fit(X_flat, y_flat)
    pred2 = lsvc_flat.predict(T)

    assert_array_equal(pred1, pred2)
    assert_allclose(lsvc_unflat.coef_, lsvc_flat.coef_, 1, 0.0001)
Пример #4
0
def test_linearsvc_parameters():
    # Test possible parameter combinations in LinearSVC
    # Generate list of possible parameter combinations
    losses = ['hinge', 'squared_hinge', 'logistic_regression', 'foo']
    penalties, duals = ['l1', 'l2', 'bar'], [True, False]

    X, y = make_classification(n_samples=5, n_features=5)

    for loss, penalty, dual in itertools.product(losses, penalties, duals):
        clf = svm.LinearSVC(penalty=penalty, loss=loss, dual=dual)
        if ((loss, penalty) == ('hinge', 'l1')
                or (loss, penalty, dual) == ('hinge', 'l2', False)
                or (penalty, dual) == ('l1', True) or loss == 'foo'
                or penalty == 'bar'):

            with pytest.raises(ValueError,
                               match="Unsupported set of "
                               "arguments.*penalty='%s.*loss='%s.*dual=%s" %
                               (penalty, loss, dual)):
                clf.fit(X, y)
        else:
            clf.fit(X, y)

    # Incorrect loss value - test if explicit error message is raised
    with pytest.raises(ValueError, match=".*loss='l3' is not supported.*"):
        svm.LinearSVC(loss="l3").fit(X, y)
Пример #5
0
def test_linear_svx_uppercase_loss_penality_raises_error():
    # Check if Upper case notation raises error at _fit_liblinear
    # which is called by fit

    X, y = [[0.0], [1.0]], [0, 1]

    assert_raise_message(ValueError, "loss='SQuared_hinge' is not supported",
                         svm.LinearSVC(loss="SQuared_hinge").fit, X, y)

    assert_raise_message(ValueError,
                         ("The combination of penalty='L2'"
                          " and loss='squared_hinge' is not supported"),
                         svm.LinearSVC(penalty="L2").fit, X, y)
Пример #6
0
def test_auto_weight():
    # Test class weights for imbalanced data
    from sklearn_lib.linear_model import LogisticRegression
    # We take as dataset the two-dimensional projection of iris so
    # that it is not separable and remove half of predictors from
    # class 1.
    # We add one to the targets as a non-regression test:
    # class_weight="balanced"
    # used to work only when the labels where a range [0..K).
    from sklearn_lib.utils import compute_class_weight
    X, y = iris.data[:, :2], iris.target + 1
    unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][::2])

    classes = np.unique(y[unbalanced])
    class_weights = compute_class_weight('balanced', classes, y[unbalanced])
    assert np.argmax(class_weights) == 2

    for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(random_state=0),
                LogisticRegression()):
        # check that score is better when class='balanced' is set.
        y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X)
        clf.set_params(class_weight='balanced')
        y_pred_balanced = clf.fit(
            X[unbalanced],
            y[unbalanced],
        ).predict(X)
        assert (metrics.f1_score(y, y_pred, average='macro') <=
                metrics.f1_score(y, y_pred_balanced, average='macro'))
Пример #7
0
def test_linearsvc():
    # Similar to test_SVC
    clf = svm.LinearSVC(random_state=0).fit(X, Y)
    sp_clf = svm.LinearSVC(random_state=0).fit(X_sp, Y)

    assert sp_clf.fit_intercept

    assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=4)
    assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=4)

    assert_array_almost_equal(clf.predict(X), sp_clf.predict(X_sp))

    clf.fit(X2, Y2)
    sp_clf.fit(X2_sp, Y2)

    assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=4)
    assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=4)
Пример #8
0
def test_linear_svc_intercept_scaling():
    # Test that the right error message is thrown when intercept_scaling <= 0

    for i in [-1, 0]:
        lsvc = svm.LinearSVC(intercept_scaling=i)
        msg = ('Intercept scaling is %r but needs to be greater than 0.'
               ' To disable fitting an intercept,'
               ' set fit_intercept=False.' % lsvc.intercept_scaling)
        assert_raise_message(ValueError, msg, lsvc.fit, X, Y)
Пример #9
0
def test_crammer_singer_binary():
    # Test Crammer-Singer formulation in the binary case
    X, y = make_classification(n_classes=2, random_state=0)

    for fit_intercept in (True, False):
        acc = svm.LinearSVC(fit_intercept=fit_intercept,
                            multi_class="crammer_singer",
                            random_state=0).fit(X, y).score(X, y)
        assert acc > 0.9
Пример #10
0
def test_linearsvc_crammer_singer():
    # Test LinearSVC with crammer_singer multi-class svm
    ovr_clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target)
    cs_clf = svm.LinearSVC(multi_class='crammer_singer', random_state=0)
    cs_clf.fit(iris.data, iris.target)

    # similar prediction for ovr and crammer-singer:
    assert (ovr_clf.predict(iris.data) == cs_clf.predict(
        iris.data)).mean() > .9

    # classifiers shouldn't be the same
    assert (ovr_clf.coef_ != cs_clf.coef_).all()

    # test decision function
    assert_array_equal(cs_clf.predict(iris.data),
                       np.argmax(cs_clf.decision_function(iris.data), axis=1))
    dec_func = np.dot(iris.data, cs_clf.coef_.T) + cs_clf.intercept_
    assert_array_almost_equal(dec_func, cs_clf.decision_function(iris.data))
Пример #11
0
def test_liblinear_set_coef():
    # multi-class case
    clf = svm.LinearSVC().fit(iris.data, iris.target)
    values = clf.decision_function(iris.data)
    clf.coef_ = clf.coef_.copy()
    clf.intercept_ = clf.intercept_.copy()
    values2 = clf.decision_function(iris.data)
    assert_array_almost_equal(values, values2)

    # binary-class case
    X = [[2, 1], [3, 1], [1, 3], [2, 3]]
    y = [0, 0, 1, 1]

    clf = svm.LinearSVC().fit(X, y)
    values = clf.decision_function(X)
    clf.coef_ = clf.coef_.copy()
    clf.intercept_ = clf.intercept_.copy()
    values2 = clf.decision_function(X)
    assert_array_equal(values, values2)
Пример #12
0
def test_linear_svm_convergence_warnings():
    # Test that warnings are raised if model does not converge

    lsvc = svm.LinearSVC(random_state=0, max_iter=2)
    assert_warns(ConvergenceWarning, lsvc.fit, X, Y)
    assert lsvc.n_iter_ == 2

    lsvr = svm.LinearSVR(random_state=0, max_iter=2)
    assert_warns(ConvergenceWarning, lsvr.fit, iris.data, iris.target)
    assert lsvr.n_iter_ == 2
Пример #13
0
def test_linearsvc_iris():
    # Test that LinearSVC gives plausible predictions on the iris dataset
    # Also, test symbolic class names (classes_).
    target = iris.target_names[iris.target]
    clf = svm.LinearSVC(random_state=0).fit(iris.data, target)
    assert set(clf.classes_) == set(iris.target_names)
    assert np.mean(clf.predict(iris.data) == target) > 0.8

    dec = clf.decision_function(iris.data)
    pred = iris.target_names[np.argmax(dec, 1)]
    assert_array_equal(pred, clf.predict(iris.data))
Пример #14
0
def test_linearsvc_verbose():
    # stdout: redirect
    import os
    stdout = os.dup(1)  # save original stdout
    os.dup2(os.pipe()[1], 1)  # replace it

    # actual call
    clf = svm.LinearSVC(verbose=1)
    clf.fit(X, Y)

    # stdout: restore
    os.dup2(stdout, 1)  # restore original stdout
Пример #15
0
def test_weight():
    # Test class weights
    X_, y_ = make_classification(n_samples=200,
                                 n_features=100,
                                 weights=[0.833, 0.167],
                                 random_state=0)

    X_ = sparse.csr_matrix(X_)
    for clf in (linear_model.LogisticRegression(),
                svm.LinearSVC(random_state=0), svm.SVC()):
        clf.set_params(class_weight={0: 5})
        clf.fit(X_[:180], y_[:180])
        y_pred = clf.predict(X_[180:])
        assert np.sum(y_pred == y_[180:]) >= 11
Пример #16
0
def test_linearsvc_iris():
    # Test the sparse LinearSVC with the iris dataset

    sp_clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target)
    clf = svm.LinearSVC(random_state=0).fit(iris.data.toarray(), iris.target)

    assert clf.fit_intercept == sp_clf.fit_intercept

    assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=1)
    assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=1)
    assert_array_almost_equal(clf.predict(iris.data.toarray()),
                              sp_clf.predict(iris.data))

    # check decision_function
    pred = np.argmax(sp_clf.decision_function(iris.data), 1)
    assert_array_almost_equal(pred, clf.predict(iris.data.toarray()))

    # sparsify the coefficients on both models and check that they still
    # produce the same results
    clf.sparsify()
    assert_array_equal(pred, clf.predict(iris.data))
    sp_clf.sparsify()
    assert_array_equal(pred, sp_clf.predict(iris.data))
Пример #17
0
def test_bad_input():
    # Test that it gives proper exception on deficient input
    # impossible value of C
    with pytest.raises(ValueError):
        svm.SVC(C=-1).fit(X, Y)

    # impossible value of nu
    clf = svm.NuSVC(nu=0.0)
    with pytest.raises(ValueError):
        clf.fit(X, Y)

    Y2 = Y[:-1]  # wrong dimensions for labels
    with pytest.raises(ValueError):
        clf.fit(X, Y2)

    # Test with arrays that are non-contiguous.
    for clf in (svm.SVC(), svm.LinearSVC(random_state=0)):
        Xf = np.asfortranarray(X)
        assert not Xf.flags['C_CONTIGUOUS']
        yf = np.ascontiguousarray(np.tile(Y, (2, 1)).T)
        yf = yf[:, -1]
        assert not yf.flags['F_CONTIGUOUS']
        assert not yf.flags['C_CONTIGUOUS']
        clf.fit(Xf, yf)
        assert_array_equal(clf.predict(T), true_result)

    # error for precomputed kernelsx
    clf = svm.SVC(kernel='precomputed')
    with pytest.raises(ValueError):
        clf.fit(X, Y)

    # sample_weight bad dimensions
    clf = svm.SVC()
    with pytest.raises(ValueError):
        clf.fit(X, Y, sample_weight=range(len(X) - 1))

    # predict with sparse input when trained with dense
    clf = svm.SVC().fit(X, Y)
    with pytest.raises(ValueError):
        clf.predict(sparse.lil_matrix(X))

    Xt = np.array(X).T
    clf.fit(np.dot(X, Xt), Y)
    with pytest.raises(ValueError):
        clf.predict(X)

    clf = svm.SVC()
    clf.fit(X, Y)
    with pytest.raises(ValueError):
        clf.predict(Xt)
Пример #18
0
def test_weight():
    # Test class weights
    clf = svm.SVC(class_weight={1: 0.1})
    # we give a small weights to class 1
    clf.fit(X, Y)
    # so all predicted values belong to class 2
    assert_array_almost_equal(clf.predict(X), [2] * 6)

    X_, y_ = make_classification(n_samples=200,
                                 n_features=10,
                                 weights=[0.833, 0.167],
                                 random_state=2)

    for clf in (linear_model.LogisticRegression(),
                svm.LinearSVC(random_state=0), svm.SVC()):
        clf.set_params(class_weight={0: .1, 1: 10})
        clf.fit(X_[:100], y_[:100])
        y_pred = clf.predict(X_[100:])
        assert f1_score(y_[100:], y_pred) > .3
Пример #19
0
def test_lsvc_intercept_scaling_zero():
    # Test that intercept_scaling is ignored when fit_intercept is False

    lsvc = svm.LinearSVC(fit_intercept=False)
    lsvc.fit(X, Y)
    assert lsvc.intercept_ == 0.