Exemplo n.º 1
def test_errors_alpha_out_of_bounds():
    X, y = datasets.make_cubic(random_state=123)

    sir = SlicedInverseRegression(alpha=10)

    with pytest.raises(ValueError):
        sir.fit(X, y)
Exemplo n.º 2
def test_n_directions_auto_heuristic():
    X, y = datasets.make_exponential(random_state=123)
    sir = SlicedInverseRegression(n_directions='auto').fit(X, y)
    assert sir.n_directions_ == 2

    X_sir = sir.transform(X)
    assert X_sir.shape == (500, 2)
Exemplo n.º 3
def sir_pre(data_train_center, data_test_center, y_label):

    sir = SlicedInverseRegression(n_directions=9, n_slices=10)
    nonzero_id = np.where(np.std(data_train_center, 0) != 0)[0]
    data_train_center_shrink = data_train_center[:, nonzero_id]
    sir.fit(data_train_center_shrink, y_label)
    data_sir_fit = data_train_center_shrink @ np.transpose(sir.directions_)
    data_sir_test = data_test_center[:, nonzero_id] @ np.transpose(

    return data_sir_fit, data_sir_test
Exemplo n.º 4
def test_all_zero_coefficients_warns_and_does_not_zero_out():
    """To avoid errors a t-test that indicates that all coefficients are
    zero will not zero-out the directions vector.
    X, y = load_breast_cancer(return_X_y=True)
    sir = SlicedInverseRegression(n_directions=2, alpha=0.05)

    with pytest.warns(RuntimeWarning):
        sir.fit(X, y)

    assert np.any(sir.directions_ != 0)
Exemplo n.º 5
def test_classification():
    """SIR is LDA for classification so lets test some predictions."""
    # Data is just 6 separable points in the plane
    X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]],
    y = np.array([1, 1, 1, 0, 0, 0])

    sir = SlicedInverseRegression(n_directions=1, n_slices=2).fit(X, y)
    lda = LinearDiscriminantAnalysis(solver='eigen').fit(X, y)

    y_pred = sir.transform(X) > 0
    np.testing.assert_equal(y, y_pred.ravel())
    np.testing.assert_equal(lda.predict(X), y_pred.ravel())
Exemplo n.º 6
def test_matches_athletes():
    """Test that the resutls match the R dr package on a ais dataset.
    X, y = datasets.load_athletes()
    sir = SlicedInverseRegression(n_directions=4, n_slices=11).fit(X, y)

        np.array([0.957661631, 0.245041613, 0.107075941, 0.090413047]))

    expected_directions = np.array(
            1.50963358e-01, -9.16480522e-01, -1.31538894e-01, -9.33588596e-02,
            4.46783829e-03, -1.88973540e-01, 2.74758965e-01, -5.63123794e-03
             -5.01785457e-02, -1.94229862e-01, 6.85475076e-01, -4.33408964e-02,
             1.83380846e-04, 3.47565293e-01, -6.05830142e-01, 1.30588502e-02
             1.08983356e-01, -2.01236965e-01, 7.19975455e-01, 4.64453982e-01,
             4.49759016e-02, 2.94969081e-01, -3.41966152e-01, -8.70270913e-02
             -2.21020634e-03, -8.97220257e-02, -6.63097774e-01, 2.90838658e-01,
             7.19045566e-02, 3.70563626e-02, 6.78877114e-01, 1.55472144e-02

    np.testing.assert_allclose(sir.directions_, expected_directions)
Exemplo n.º 7
def test_single_y_value():
    rng = np.random.RandomState(123)

    X = rng.randn(100, 4)
    y = np.ones(100)

    with pytest.raises(ValueError):
        SlicedInverseRegression().fit(X, y)
Exemplo n.º 8
def test_n_slices_too_big():
    X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]],
    y = np.array([1, 1, 1, 0, 0, 0])

    sir = SlicedInverseRegression(n_directions=1, n_slices=10).fit(X, y)

    assert sir.n_slices_ == 2
Exemplo n.º 9
def test_sparse_coefficient():
    """Test the component-wise t-test works on a simple synthetic dataset.
    rng = np.random.RandomState(123)

    n_samples = 300
    n_features = 6
    X = rng.randn(n_samples, n_features)
    noise = rng.randn(n_samples).reshape(-1, 1)
    beta = np.array([1, 0, 0, -1, 0, 0]).reshape(-1, 1)
    y = np.exp(-0.75 * np.dot(X, beta)) + 0.5 * noise

    sir = SlicedInverseRegression(alpha=0.05)
    sir.fit(X, y.ravel())

    np.testing.assert_array_equal(sir.directions_.ravel() != 0,
                                  beta.ravel() != 0)
Exemplo n.º 10
def test_regression():
    """NOTE: subsequent calls may flip the direction of eigenvectors
        (mulitply by -1), so we can only compare absolute values.

        This was not a problem for svds.. investigate if we can get
        deterministic behavior back.
    X, y = datasets.make_cubic(random_state=123)

    for n_dir in range(1, X.shape[1]):
        sir = SlicedInverseRegression(n_directions=n_dir)

        # take shape is correct
        X_sir = sir.fit(X, y).transform(X)
        np.testing.assert_equal(X_sir.shape[1], n_dir)

        # should match fit_transform
        X_sir2 = sir.fit_transform(X, y)
        np.testing.assert_allclose(np.abs(X_sir), np.abs(X_sir2))

        # call transform again and check if things are okay
        X_sir = sir.transform(X)
        X_sir2 = sir.fit_transform(X, y)
        np.testing.assert_allclose(np.abs(X_sir), np.abs(X_sir2))

        # there is one true angle it should fine
        true_beta = (1 / np.sqrt(2)) * np.hstack((np.ones(2), np.zeros(8)))
        angle = np.dot(true_beta, sir.directions_[0, :])
        np.testing.assert_allclose(np.abs(angle), 1, rtol=1e-1)
Exemplo n.º 11
def test_sparse_coefficient_multiple_dimensions():
    """Perform the t-test on a dataset with two directions.
    rng = np.random.RandomState(123)

    n_samples = 300
    n_features = 15
    beta = np.zeros((2, n_features))
    beta[0, :9] = 1
    beta[1, 9:] = 1
    X = rng.randn(n_samples, n_features)
    y = (np.sign(np.dot(X, beta[0, :])) *
         np.log(np.abs(np.dot(X, beta[1, :]) + 5)))

    sir = SlicedInverseRegression(n_directions=2, alpha=0.05)
    sir.fit(X, y.ravel())

    # the first dimension is found without error
    np.testing.assert_array_equal(sir.directions_[1, :].ravel() != 0,
                                  beta[0, :].ravel() != 0)

    # the second dimension picks up a few spurious coefficients
    assert np.all(sir.directions_[0, :].ravel()[9:] != 0)
Exemplo n.º 12
 def __init__(self):
     self.regressor = SlicedInverseRegression()
Exemplo n.º 13

A comparison of the subspace found by sliced inverse regression and
principal component analysis on the australian athletes dataset.
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA

from sliced.datasets import load_athletes
from sliced import SlicedInverseRegression

X, y = load_athletes()

# fit SIR model
sir = SlicedInverseRegression(n_slices=11).fit(X, y)
X_sir = sir.transform(X)

# fit PCA
pca = PCA(random_state=123).fit(X, y)
X_pca = pca.transform(X)

f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)

ax1.scatter(X_sir[:, 0], y, c=y, cmap='viridis', linewidth=0.5, edgecolor='k')
ax1.set_title('SIR Subspace')
ax1.set_ylabel("Lean Body Mass (kg)")

ax2.scatter(X_pca[:, 0], y, c=y, cmap='viridis', linewidth=0.5, edgecolor='k')
ax2.set_title('PCA Subspace')
Exemplo n.º 14
def test_sparse_not_supported():
    X, y = datasets.make_cubic(random_state=123)
    X = sparse.csr_matrix(X)
    with pytest.raises(TypeError):
        SlicedInverseRegression().fit(X, y)
Exemplo n.º 15
Sliced Inverse Regression

An example plot of :class:`sliced.sir.SlicedInverseRegression`
import numpy as np
import matplotlib.pyplot as plt

from sliced import SlicedInverseRegression
from sliced import datasets

X, y = datasets.make_cubic(random_state=123)

sir = SlicedInverseRegression()
X_sir = sir.fit_transform(X, y)

# estimate of the first dimension reducing directions
beta1_hat = sir.directions_[0, :]

# plot data projected onto the first direction
plt.scatter(X_sir[:, 0], y, c=y, cmap='viridis', linewidth=0.5, edgecolor='k')

# annotation showing the direction found
beta_text = "$\\beta_1$ = " + "{0}".format([0.707, 0.707])
plt.annotate(beta_text, xy=(-2, 6.5))
beta1_hat_text = "$\hat{\\beta_1}$ = " + "{0}".format(
    np.round(beta1_hat, 3).tolist()[:2])
Exemplo n.º 16
def test_n_directions_none():
    X, y = datasets.make_cubic(random_state=123)
    sir = SlicedInverseRegression(n_directions=None).fit(X, y)
    np.testing.assert_equal(sir.n_directions_, X.shape[1])
Exemplo n.º 17
Binary Targets with SIR

Sliced Inverse Regression is able to find a one-dimensional subspace
that seperates cases in the famous breast cancer dataset.
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
from sliced import SlicedInverseRegression

X, y = load_breast_cancer(return_X_y=True)

sir = SlicedInverseRegression(n_directions=2).fit(X, y)
X_sir = sir.transform(X)

plt.scatter(X_sir[:, 0], X_sir[:, 1], c=y, alpha=0.8, edgecolor='k')
plt.title("Breast Cancer Data")

Exemplo n.º 18
def test_zero_variance_features():
    """Raise an informative error message when features of zero variance."""
    X, y = load_digits(return_X_y=True)

    with pytest.raises(linalg.LinAlgError):
        sir = SlicedInverseRegression(n_directions='auto').fit(X, y)