Пример #1
0
def test_decision_function_shape():
    # check that decision_function_shape='ovr' gives
    # correct shape and is consistent with predict

    clf = svm.SVC(kernel='linear', C=0.1,
                  decision_function_shape='ovr').fit(iris.data, iris.target)
    dec = clf.decision_function(iris.data)
    assert dec.shape == (len(iris.data), 3)
    assert_array_equal(clf.predict(iris.data), np.argmax(dec, axis=1))

    # with five classes:
    X, y = make_blobs(n_samples=80, centers=5, random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    clf = svm.SVC(kernel='linear', C=0.1,
                  decision_function_shape='ovr').fit(X_train, y_train)
    dec = clf.decision_function(X_test)
    assert dec.shape == (len(X_test), 5)
    assert_array_equal(clf.predict(X_test), np.argmax(dec, axis=1))

    # check shape of ovo_decition_function=True
    clf = svm.SVC(kernel='linear', C=0.1,
                  decision_function_shape='ovo').fit(X_train, y_train)
    dec = clf.decision_function(X_train)
    assert dec.shape == (len(X_train), 10)
Пример #2
0
def test_decision_function():
    # Test decision_function
    # Sanity check, test that decision_function implemented in python
    # returns the same as the one in libsvm
    # multi class:
    clf = svm.SVC(kernel='linear', C=0.1,
                  decision_function_shape='ovo').fit(iris.data, iris.target)

    dec = np.dot(iris.data, clf.coef_.T) + clf.intercept_

    assert_array_almost_equal(dec, clf.decision_function(iris.data))

    # binary:
    clf.fit(X, Y)
    dec = np.dot(X, clf.coef_.T) + clf.intercept_
    prediction = clf.predict(X)
    assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
    assert_array_almost_equal(
        prediction,
        clf.classes_[(clf.decision_function(X) > 0).astype(np.int)])
    expected = np.array([-1., -0.66, -1., 0.66, 1., 1.])
    assert_array_almost_equal(clf.decision_function(X), expected, 2)

    # kernel binary:
    clf = svm.SVC(kernel='rbf', gamma=1, decision_function_shape='ovo')
    clf.fit(X, Y)

    rbfs = rbf_kernel(X, clf.support_vectors_, gamma=clf.gamma)
    dec = np.dot(rbfs, clf.dual_coef_.T) + clf.intercept_
    assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
Пример #3
0
def test_svc_clone_with_callable_kernel():
    # create SVM with callable linear kernel, check that results are the same
    # as with built-in linear kernel
    svm_callable = svm.SVC(kernel=lambda x, y: np.dot(x, y.T),
                           probability=True,
                           random_state=0,
                           decision_function_shape='ovr')
    # clone for checking clonability with lambda functions..
    svm_cloned = base.clone(svm_callable)
    svm_cloned.fit(iris.data, iris.target)

    svm_builtin = svm.SVC(kernel='linear',
                          probability=True,
                          random_state=0,
                          decision_function_shape='ovr')
    svm_builtin.fit(iris.data, iris.target)

    assert_array_almost_equal(svm_cloned.dual_coef_, svm_builtin.dual_coef_)
    assert_array_almost_equal(svm_cloned.intercept_, svm_builtin.intercept_)
    assert_array_equal(svm_cloned.predict(iris.data),
                       svm_builtin.predict(iris.data))

    assert_array_almost_equal(svm_cloned.predict_proba(iris.data),
                              svm_builtin.predict_proba(iris.data),
                              decimal=4)
    assert_array_almost_equal(svm_cloned.decision_function(iris.data),
                              svm_builtin.decision_function(iris.data))
Пример #4
0
def test_svc_with_custom_kernel():
    def kfunc(x, y):
        return safe_sparse_dot(x, y.T)

    clf_lin = svm.SVC(kernel='linear').fit(X_sp, Y)
    clf_mylin = svm.SVC(kernel=kfunc).fit(X_sp, Y)
    assert_array_equal(clf_lin.predict(X_sp), clf_mylin.predict(X_sp))
Пример #5
0
def test_sparse_realdata():
    # Test on a subset from the 20newsgroups dataset.
    # This catches some bugs if input is not correctly converted into
    # sparse format or weights are not correctly initialized.

    data = np.array([0.03771744, 0.1003567, 0.01174647, 0.027069])
    indices = np.array([6, 5, 35, 31])
    indptr = np.array([
        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 4, 4, 4
    ])
    X = sparse.csr_matrix((data, indices, indptr))
    y = np.array([
        1., 0., 2., 2., 1., 1., 1., 2., 2., 0., 1., 2., 2., 0., 2., 0., 3., 0.,
        3., 0., 1., 1., 3., 2., 3., 2., 0., 3., 1., 0., 2., 1., 2., 0., 1., 0.,
        2., 3., 1., 3., 0., 1., 0., 0., 2., 0., 1., 2., 2., 2., 3., 2., 0., 3.,
        2., 1., 2., 3., 2., 2., 0., 1., 0., 1., 2., 3., 0., 0., 2., 2., 1., 3.,
        1., 1., 0., 1., 2., 1., 1., 3.
    ])

    clf = svm.SVC(kernel='linear').fit(X.toarray(), y)
    sp_clf = svm.SVC(kernel='linear').fit(sparse.coo_matrix(X), y)

    assert_array_equal(clf.support_vectors_, sp_clf.support_vectors_.toarray())
    assert_array_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
Пример #6
0
def test_consistent_proba():
    a = svm.SVC(probability=True, max_iter=1, random_state=0)
    with ignore_warnings(category=ConvergenceWarning):
        proba_1 = a.fit(X, Y).predict_proba(X)
    a = svm.SVC(probability=True, max_iter=1, random_state=0)
    with ignore_warnings(category=ConvergenceWarning):
        proba_2 = a.fit(X, Y).predict_proba(X)
    assert_array_almost_equal(proba_1, proba_2)
Пример #7
0
def test_pairwise_attribute():
    clf_precomputed = svm.SVC(kernel='precomputed')
    clf_notprecomputed = svm.SVC()

    for MultiClassClassifier in [OneVsRestClassifier, OneVsOneClassifier]:
        ovr_false = MultiClassClassifier(clf_notprecomputed)
        assert not ovr_false._pairwise

        ovr_true = MultiClassClassifier(clf_precomputed)
        assert ovr_true._pairwise
Пример #8
0
def test_svc_iris():
    # Test the sparse SVC with the iris dataset
    for k in ('linear', 'poly', 'rbf'):
        sp_clf = svm.SVC(kernel=k).fit(iris.data, iris.target)
        clf = svm.SVC(kernel=k).fit(iris.data.toarray(), iris.target)

        assert_array_almost_equal(clf.support_vectors_,
                                  sp_clf.support_vectors_.toarray())
        assert_array_almost_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
        assert_array_almost_equal(clf.predict(iris.data.toarray()),
                                  sp_clf.predict(iris.data))
        if k == 'linear':
            assert_array_almost_equal(clf.coef_, sp_clf.coef_.toarray())
Пример #9
0
def test_pairwise_cross_val_score():
    clf_precomputed = svm.SVC(kernel='precomputed')
    clf_notprecomputed = svm.SVC(kernel='linear')

    X, y = iris.data, iris.target

    for MultiClassClassifier in [OneVsRestClassifier, OneVsOneClassifier]:
        ovr_false = MultiClassClassifier(clf_notprecomputed)
        ovr_true = MultiClassClassifier(clf_precomputed)

        linear_kernel = np.dot(X, X.T)
        score_precomputed = cross_val_score(ovr_true, linear_kernel, y)
        score_linear = cross_val_score(ovr_false, X, y)
        assert_array_equal(score_precomputed, score_linear)
Пример #10
0
def test_auto_weight():
    # Test class weights for imbalanced data
    from mrex.linear_model import LogisticRegression
    # We take as dataset the two-dimensional projection of iris so
    # that it is not separable and remove half of predictors from
    # class 1.
    # We add one to the targets as a non-regression test:
    # class_weight="balanced"
    # used to work only when the labels where a range [0..K).
    from mrex.utils import compute_class_weight
    X, y = iris.data[:, :2], iris.target + 1
    unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][::2])

    classes = np.unique(y[unbalanced])
    class_weights = compute_class_weight('balanced', classes, y[unbalanced])
    assert np.argmax(class_weights) == 2

    for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(random_state=0),
                LogisticRegression()):
        # check that score is better when class='balanced' is set.
        y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X)
        clf.set_params(class_weight='balanced')
        y_pred_balanced = clf.fit(
            X[unbalanced],
            y[unbalanced],
        ).predict(X)
        assert (metrics.f1_score(y, y_pred, average='macro') <=
                metrics.f1_score(y, y_pred_balanced, average='macro'))
Пример #11
0
    def fit(self):
        print("fit the model")
        train = np.array(self.model.data)
        X = train[:, 0:2]
        y = train[:, 2]

        C = float(self.complexity.get())
        gamma = float(self.gamma.get())
        coef0 = float(self.coef0.get())
        degree = int(self.degree.get())
        kernel_map = {0: "linear", 1: "rbf", 2: "poly"}
        if len(np.unique(y)) == 1:
            clf = svm.OneClassSVM(kernel=kernel_map[self.kernel.get()],
                                  gamma=gamma, coef0=coef0, degree=degree)
            clf.fit(X)
        else:
            clf = svm.SVC(kernel=kernel_map[self.kernel.get()], C=C,
                          gamma=gamma, coef0=coef0, degree=degree)
            clf.fit(X, y)
        if hasattr(clf, 'score'):
            print("Accuracy:", clf.score(X, y) * 100)
        X1, X2, Z = self.decision_surface(clf)
        self.model.clf = clf
        self.model.set_surface((X1, X2, Z))
        self.model.surface_type = self.surface_type.get()
        self.fitted = True
        self.model.changed("surface")
Пример #12
0
def test_sparse_precomputed():
    clf = svm.SVC(kernel='precomputed')
    sparse_gram = sparse.csr_matrix([[1, 0], [0, 1]])
    try:
        clf.fit(sparse_gram, [0, 1])
        assert not "reached"
    except TypeError as e:
        assert "Sparse precomputed" in str(e)
Пример #13
0
def test_timeout():
    sp = svm.SVC(C=1,
                 kernel=lambda x, y: x * y.T,
                 probability=True,
                 random_state=0,
                 max_iter=1)

    assert_warns(ConvergenceWarning, sp.fit, X_sp, Y)
Пример #14
0
def test_libsvm_parameters():
    # Test parameters on classes that make use of libsvm.
    clf = svm.SVC(kernel='linear').fit(X, Y)
    assert_array_equal(clf.dual_coef_, [[-0.25, .25]])
    assert_array_equal(clf.support_, [1, 3])
    assert_array_equal(clf.support_vectors_, (X[1], X[3]))
    assert_array_equal(clf.intercept_, [0.])
    assert_array_equal(clf.predict(X), Y)
Пример #15
0
def test_error():
    # Test that it gives proper exception on deficient input
    # impossible value of C
    with pytest.raises(ValueError):
        svm.SVC(C=-1).fit(X, Y)

    # impossible value of nu
    clf = svm.NuSVC(nu=0.0)
    with pytest.raises(ValueError):
        clf.fit(X_sp, Y)

    Y2 = Y[:-1]  # wrong dimensions for labels
    with pytest.raises(ValueError):
        clf.fit(X_sp, Y2)

    clf = svm.SVC()
    clf.fit(X_sp, Y)
    assert_array_equal(clf.predict(T), true_result)
Пример #16
0
def test_sample_weights():
    # Test weights on individual samples
    # TODO: check on NuSVR, OneClass, etc.
    clf = svm.SVC()
    clf.fit(X, Y)
    assert_array_equal(clf.predict([X[2]]), [1.])

    sample_weight = [.1] * 3 + [10] * 3
    clf.fit(X, Y, sample_weight=sample_weight)
    assert_array_equal(clf.predict([X[2]]), [2.])

    # test that rescaling all samples is the same as changing C
    clf = svm.SVC()
    clf.fit(X, Y)
    dual_coef_no_weight = clf.dual_coef_
    clf.set_params(C=100)
    clf.fit(X, Y, sample_weight=np.repeat(0.01, len(X)))
    assert_array_almost_equal(dual_coef_no_weight, clf.dual_coef_)
Пример #17
0
def test_ovr_single_label_decision_function():
    X, Y = datasets.make_classification(n_samples=100,
                                        n_features=20,
                                        random_state=0)
    X_train, Y_train = X[:80], Y[:80]
    X_test = X[80:]
    clf = OneVsRestClassifier(svm.SVC()).fit(X_train, Y_train)
    assert_array_equal(
        clf.decision_function(X_test).ravel() > 0, clf.predict(X_test))
Пример #18
0
def test_sample_weights():
    # Test weights on individual samples
    clf = svm.SVC()
    clf.fit(X_sp, Y)
    assert_array_equal(clf.predict([X[2]]), [1.])

    sample_weight = [.1] * 3 + [10] * 3
    clf.fit(X_sp, Y, sample_weight=sample_weight)
    assert_array_equal(clf.predict([X[2]]), [2.])
Пример #19
0
def test_weight():
    # Test class weights
    clf = svm.SVC(class_weight={1: 0.1})
    # we give a small weights to class 1
    clf.fit(X, Y)
    # so all predicted values belong to class 2
    assert_array_almost_equal(clf.predict(X), [2] * 6)

    X_, y_ = make_classification(n_samples=200,
                                 n_features=10,
                                 weights=[0.833, 0.167],
                                 random_state=2)

    for clf in (linear_model.LogisticRegression(),
                svm.LinearSVC(random_state=0), svm.SVC()):
        clf.set_params(class_weight={0: .1, 1: 10})
        clf.fit(X_[:100], y_[:100])
        y_pred = clf.predict(X_[100:])
        assert f1_score(y_[100:], y_pred) > .3
Пример #20
0
def test_unicode_kernel():
    # Test that a unicode kernel name does not cause a TypeError
    clf = svm.SVC(kernel='linear', probability=True)
    clf.fit(X, Y)
    clf.predict_proba(T)
    svm.libsvm.cross_validation(iris.data,
                                iris.target.astype(np.float64),
                                5,
                                kernel='linear',
                                random_seed=0)
Пример #21
0
def test_unfitted():
    X = "foo!"  # input validation not required when SVM not fitted

    clf = svm.SVC()
    with pytest.raises(Exception, match=r".*\bSVC\b.*\bnot\b.*\bfitted\b"):
        clf.predict(X)

    clf = svm.NuSVR()
    with pytest.raises(Exception, match=r".*\bNuSVR\b.*\bnot\b.*\bfitted\b"):
        clf.predict(X)
Пример #22
0
def test_sparse_svc_clone_with_callable_kernel():
    # Test that the "dense_fit" is called even though we use sparse input
    # meaning that everything works fine.
    a = svm.SVC(C=1,
                kernel=lambda x, y: x * y.T,
                probability=True,
                random_state=0)
    b = base.clone(a)

    b.fit(X_sp, Y)
    pred = b.predict(X_sp)
    b.predict_proba(X_sp)

    dense_svm = svm.SVC(C=1,
                        kernel=lambda x, y: np.dot(x, y.T),
                        probability=True,
                        random_state=0)
    pred_dense = dense_svm.fit(X, Y).predict(X)
    assert_array_equal(pred_dense, pred)
Пример #23
0
def test_gamma_scale():
    X, y = [[0.], [1.]], [0, 1]

    clf = svm.SVC()
    assert_no_warnings(clf.fit, X, y)
    assert_almost_equal(clf._gamma, 4)

    # X_var ~= 1 shouldn't raise warning, for when
    # gamma is not explicitly set.
    X, y = [[1, 2], [3, 2 * np.sqrt(6) / 3 + 2]], [0, 1]
    assert_no_warnings(clf.fit, X, y)
Пример #24
0
def test_hasattr_predict_proba():
    # Method must be (un)available before or after fit, switched by
    # `probability` param

    G = svm.SVC(probability=True)
    assert hasattr(G, 'predict_proba')
    G.fit(iris.data, iris.target)
    assert hasattr(G, 'predict_proba')

    G = svm.SVC(probability=False)
    assert not hasattr(G, 'predict_proba')
    G.fit(iris.data, iris.target)
    assert not hasattr(G, 'predict_proba')

    # Switching to `probability=True` after fitting should make
    # predict_proba available, but calling it must not work:
    G.probability = True
    assert hasattr(G, 'predict_proba')
    msg = "predict_proba is not available when fitted with probability=False"
    assert_raise_message(NotFittedError, msg, G.predict_proba, iris.data)
Пример #25
0
def test_unsorted_indices():
    # test that the result with sorted and unsorted indices in csr is the same
    # we use a subset of digits as iris, blobs or make_classification didn't
    # show the problem
    X, y = load_digits(return_X_y=True)
    X_test = sparse.csr_matrix(X[50:100])
    X, y = X[:50], y[:50]

    X_sparse = sparse.csr_matrix(X)
    coef_dense = svm.SVC(kernel='linear', probability=True,
                         random_state=0).fit(X, y).coef_
    sparse_svc = svm.SVC(kernel='linear', probability=True,
                         random_state=0).fit(X_sparse, y)
    coef_sorted = sparse_svc.coef_
    # make sure dense and sparse SVM give the same result
    assert_array_almost_equal(coef_dense, coef_sorted.toarray())

    # reverse each row's indices
    def scramble_indices(X):
        new_data = []
        new_indices = []
        for i in range(1, len(X.indptr)):
            row_slice = slice(*X.indptr[i - 1:i + 1])
            new_data.extend(X.data[row_slice][::-1])
            new_indices.extend(X.indices[row_slice][::-1])
        return sparse.csr_matrix((new_data, new_indices, X.indptr),
                                 shape=X.shape)

    X_sparse_unsorted = scramble_indices(X_sparse)
    X_test_unsorted = scramble_indices(X_test)

    assert not X_sparse_unsorted.has_sorted_indices
    assert not X_test_unsorted.has_sorted_indices

    unsorted_svc = svm.SVC(kernel='linear', probability=True,
                           random_state=0).fit(X_sparse_unsorted, y)
    coef_unsorted = unsorted_svc.coef_
    # make sure unsorted indices give same result
    assert_array_almost_equal(coef_unsorted.toarray(), coef_sorted.toarray())
    assert_array_almost_equal(sparse_svc.predict_proba(X_test_unsorted),
                              sparse_svc.predict_proba(X_test))
Пример #26
0
def test_ovr_multilabel_predict_proba():
    base_clf = MultinomialNB(alpha=1)
    for au in (False, True):
        X, Y = datasets.make_multilabel_classification(n_samples=100,
                                                       n_features=20,
                                                       n_classes=5,
                                                       n_labels=3,
                                                       length=50,
                                                       allow_unlabeled=au,
                                                       random_state=0)
        X_train, Y_train = X[:80], Y[:80]
        X_test = X[80:]
        clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)

        # Decision function only estimator.
        decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train)
        assert not hasattr(decision_only, 'predict_proba')

        # Estimator with predict_proba disabled, depending on parameters.
        decision_only = OneVsRestClassifier(svm.SVC(probability=False))
        assert not hasattr(decision_only, 'predict_proba')
        decision_only.fit(X_train, Y_train)
        assert not hasattr(decision_only, 'predict_proba')
        assert hasattr(decision_only, 'decision_function')

        # Estimator which can get predict_proba enabled after fitting
        gs = GridSearchCV(svm.SVC(probability=False),
                          param_grid={'probability': [True]})
        proba_after_fit = OneVsRestClassifier(gs)
        assert not hasattr(proba_after_fit, 'predict_proba')
        proba_after_fit.fit(X_train, Y_train)
        assert hasattr(proba_after_fit, 'predict_proba')

        Y_pred = clf.predict(X_test)
        Y_proba = clf.predict_proba(X_test)

        # predict assigns a label if the probability that the
        # sample has the label is greater than 0.5.
        pred = Y_proba > .5
        assert_array_equal(pred, Y_pred)
Пример #27
0
def test_tweak_params():
    # Make sure some tweaking of parameters works.
    # We change clf.dual_coef_ at run time and expect .predict() to change
    # accordingly. Notice that this is not trivial since it involves a lot
    # of C/Python copying in the libsvm bindings.
    # The success of this test ensures that the mapping between libsvm and
    # the python classifier is complete.
    clf = svm.SVC(kernel='linear', C=1.0)
    clf.fit(X, Y)
    assert_array_equal(clf.dual_coef_, [[-.25, .25]])
    assert_array_equal(clf.predict([[-.1, -.1]]), [1])
    clf._dual_coef_ = np.array([[.0, 1.]])
    assert_array_equal(clf.predict([[-.1, -.1]]), [2])
Пример #28
0
def test_ovr_multilabel_decision_function():
    X, Y = datasets.make_multilabel_classification(n_samples=100,
                                                   n_features=20,
                                                   n_classes=5,
                                                   n_labels=3,
                                                   length=50,
                                                   allow_unlabeled=True,
                                                   random_state=0)
    X_train, Y_train = X[:80], Y[:80]
    X_test = X[80:]
    clf = OneVsRestClassifier(svm.SVC()).fit(X_train, Y_train)
    assert_array_equal((clf.decision_function(X_test) > 0).astype(int),
                       clf.predict(X_test))
Пример #29
0
def test_svc():
    """Check that sparse SVC gives the same result as SVC"""
    # many class dataset:
    X_blobs, y_blobs = make_blobs(n_samples=100, centers=10, random_state=0)
    X_blobs = sparse.csr_matrix(X_blobs)

    datasets = [[X_sp, Y, T], [X2_sp, Y2, T2],
                [X_blobs[:80], y_blobs[:80], X_blobs[80:]],
                [iris.data, iris.target, iris.data]]
    kernels = ["linear", "poly", "rbf", "sigmoid"]
    for dataset in datasets:
        for kernel in kernels:
            clf = svm.SVC(gamma=1,
                          kernel=kernel,
                          probability=True,
                          random_state=0,
                          decision_function_shape='ovo')
            sp_clf = svm.SVC(gamma=1,
                             kernel=kernel,
                             probability=True,
                             random_state=0,
                             decision_function_shape='ovo')
            check_svm_model_equal(clf, sp_clf, *dataset)
Пример #30
0
def test_weight():
    # Test class weights
    X_, y_ = make_classification(n_samples=200,
                                 n_features=100,
                                 weights=[0.833, 0.167],
                                 random_state=0)

    X_ = sparse.csr_matrix(X_)
    for clf in (linear_model.LogisticRegression(),
                svm.LinearSVC(random_state=0), svm.SVC()):
        clf.set_params(class_weight={0: 5})
        clf.fit(X_[:180], y_[:180])
        y_pred = clf.predict(X_[180:])
        assert np.sum(y_pred == y_[180:]) >= 11