示例#1
0
def test_n_directions_auto_heuristic():
    X, y = datasets.make_exponential(random_state=123)
    sir = SlicedInverseRegression(n_directions='auto').fit(X, y)
    assert sir.n_directions_ == 2

    X_sir = sir.transform(X)
    assert X_sir.shape == (500, 2)
示例#2
0
def test_regression():
    """NOTE: subsequent calls may flip the direction of eigenvectors
        (mulitply by -1), so we can only compare absolute values.

        This was not a problem for svds.. investigate if we can get
        deterministic behavior back.
    """
    X, y = datasets.make_cubic(random_state=123)

    for n_dir in range(1, X.shape[1]):
        sir = SlicedInverseRegression(n_directions=n_dir)

        # take shape is correct
        X_sir = sir.fit(X, y).transform(X)
        np.testing.assert_equal(X_sir.shape[1], n_dir)

        # should match fit_transform
        X_sir2 = sir.fit_transform(X, y)
        np.testing.assert_allclose(np.abs(X_sir), np.abs(X_sir2))

        # call transform again and check if things are okay
        X_sir = sir.transform(X)
        X_sir2 = sir.fit_transform(X, y)
        np.testing.assert_allclose(np.abs(X_sir), np.abs(X_sir2))

        # there is one true angle it should fine
        true_beta = (1 / np.sqrt(2)) * np.hstack((np.ones(2), np.zeros(8)))
        angle = np.dot(true_beta, sir.directions_[0, :])
        np.testing.assert_allclose(np.abs(angle), 1, rtol=1e-1)
示例#3
0
def test_classification():
    """SIR is LDA for classification so lets test some predictions."""
    # Data is just 6 separable points in the plane
    X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]],
                 dtype=np.float64)
    y = np.array([1, 1, 1, 0, 0, 0])

    sir = SlicedInverseRegression(n_directions=1, n_slices=2).fit(X, y)
    lda = LinearDiscriminantAnalysis(solver='eigen').fit(X, y)

    y_pred = sir.transform(X) > 0
    np.testing.assert_equal(y, y_pred.ravel())
    np.testing.assert_equal(lda.predict(X), y_pred.ravel())
"""
=======================
Binary Targets with SIR
=======================

Sliced Inverse Regression is able to find a one-dimensional subspace
that seperates cases in the famous breast cancer dataset.
"""
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
from sliced import SlicedInverseRegression

X, y = load_breast_cancer(return_X_y=True)

sir = SlicedInverseRegression(n_directions=2).fit(X, y)
X_sir = sir.transform(X)

plt.scatter(X_sir[:, 0], X_sir[:, 1], c=y, alpha=0.8, edgecolor='k')
plt.xlabel("$X\hat{\\beta}_{1}$")
plt.ylabel("$X\hat{\\beta}_{2}$")
plt.title("Breast Cancer Data")

plt.show()