def test_n_directions_auto_heuristic(): X, y = datasets.make_exponential(random_state=123) sir = SlicedInverseRegression(n_directions='auto').fit(X, y) assert sir.n_directions_ == 2 X_sir = sir.transform(X) assert X_sir.shape == (500, 2)
def test_regression(): """NOTE: subsequent calls may flip the direction of eigenvectors (mulitply by -1), so we can only compare absolute values. This was not a problem for svds.. investigate if we can get deterministic behavior back. """ X, y = datasets.make_cubic(random_state=123) for n_dir in range(1, X.shape[1]): sir = SlicedInverseRegression(n_directions=n_dir) # take shape is correct X_sir = sir.fit(X, y).transform(X) np.testing.assert_equal(X_sir.shape[1], n_dir) # should match fit_transform X_sir2 = sir.fit_transform(X, y) np.testing.assert_allclose(np.abs(X_sir), np.abs(X_sir2)) # call transform again and check if things are okay X_sir = sir.transform(X) X_sir2 = sir.fit_transform(X, y) np.testing.assert_allclose(np.abs(X_sir), np.abs(X_sir2)) # there is one true angle it should fine true_beta = (1 / np.sqrt(2)) * np.hstack((np.ones(2), np.zeros(8))) angle = np.dot(true_beta, sir.directions_[0, :]) np.testing.assert_allclose(np.abs(angle), 1, rtol=1e-1)
def test_classification(): """SIR is LDA for classification so lets test some predictions.""" # Data is just 6 separable points in the plane X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]], dtype=np.float64) y = np.array([1, 1, 1, 0, 0, 0]) sir = SlicedInverseRegression(n_directions=1, n_slices=2).fit(X, y) lda = LinearDiscriminantAnalysis(solver='eigen').fit(X, y) y_pred = sir.transform(X) > 0 np.testing.assert_equal(y, y_pred.ravel()) np.testing.assert_equal(lda.predict(X), y_pred.ravel())
""" ======================= Binary Targets with SIR ======================= Sliced Inverse Regression is able to find a one-dimensional subspace that seperates cases in the famous breast cancer dataset. """ import matplotlib.pyplot as plt from sklearn.datasets import load_breast_cancer from sliced import SlicedInverseRegression X, y = load_breast_cancer(return_X_y=True) sir = SlicedInverseRegression(n_directions=2).fit(X, y) X_sir = sir.transform(X) plt.scatter(X_sir[:, 0], X_sir[:, 1], c=y, alpha=0.8, edgecolor='k') plt.xlabel("$X\hat{\\beta}_{1}$") plt.ylabel("$X\hat{\\beta}_{2}$") plt.title("Breast Cancer Data") plt.show()