def test_FracRidgeRegressorCV(nn, pp, bb, fit_intercept, jit): X, y, _, _ = make_data(nn, pp, bb, fit_intercept) fracs = np.arange(.1, 1.1, .1) FRCV = FracRidgeRegressorCV(fit_intercept=fit_intercept, jit=jit) FRCV.fit(X, y, frac_grid=fracs) FR = FracRidgeRegressor(fracs=FRCV.best_frac_, fit_intercept=fit_intercept) FR.fit(X, y) assert np.allclose(FR.coef_, FRCV.coef_, atol=10e-3) RR = Ridge(alpha=FRCV.alpha_, fit_intercept=fit_intercept, solver='svd') RR.fit(X, y) # The coefficients in the sklearn object are transposed relative to # our conventions: assert np.allclose(RR.coef_.T, FRCV.coef_, atol=10e-3)
n_targets = 15 n_features = 80 effective_rank = 20 X, y, coef_true = make_regression(n_samples=250, n_features=n_features, effective_rank=effective_rank, n_targets=n_targets, coef=True, noise=10) ########################################################################## # Iterating over the number of features, we generate design matrices that have # more and more dimensions in them. As the number of data dimensions grows, the # best fraction for FracRidge decreases. best_fracs = [] for n_components in range(2, X.shape[-1], 5): pca = PCA(n_components=n_components) frcv = FracRidgeRegressorCV() pipeline = Pipeline(steps=[('pca', pca), ('fracridgecv', frcv)]) pipeline.fit(X, y) best_fracs.append(pipeline['fracridgecv'].best_frac_) fig, ax = plt.subplots() ax.plot(range(2, X.shape[-1], 5), best_fracs, 'o-') ax.set_ylim([0, 1]) ax.set_ylabel("Best fraction") ax.set_xlabel("Number of PCA components") plt.show()
# We will start with SRR. We use a dense grid of alphas with 20 # log-spaced values -- a common heuristic used to ensure a wide sampling # of alpha values n_alphas = 20 srr_alphas = np.logspace(-10, 10, n_alphas) srr = RidgeCV(alphas=srr_alphas) srr.fit(X_train, y_train) ########################################################################## # We sample the same number of fractions for FRR, evenly distributed between # 1/n_alphas and 1. # fracs = np.linspace(1 / n_alphas, 1 + 1 / n_alphas, n_alphas) frr = FracRidgeRegressorCV() frr.fit(X_train, y_train, frac_grid=fracs) ########################################################################## # Both models are fit and used to predict a left out set. Performance # of the models is compared using the :func:`sklearn.metrics.r2_score` # function (coefficient of determination). pred_frr = frr.predict(X_test) pred_srr = srr.predict(X_test) frr_r2 = r2_score(y_test, pred_frr) srr_r2 = r2_score(y_test, pred_srr) print(frr_r2) print(srr_r2)
def test_FracRidge_estimator(): check_estimator(FracRidgeRegressor()) check_estimator(FracRidgeRegressorCV())
# We will start with SRR. We use a dense grid of alphas with 20 # log-spaced values -- a common heuristic used to ensure a wide sampling # of alpha values n_alphas = 20 srr_alphas = np.logspace(-10, 10, n_alphas) srr = RidgeCV(alphas=srr_alphas) srr.fit(X_train, y_train) ########################################################################## # We sample the same number of fractions for FRR, evenly distributed between # 1/n_alphas and 1. # fracs = np.linspace(1 / n_alphas, 1 + 1 / n_alphas, n_alphas) frr = FracRidgeRegressorCV(frac_grid=fracs) frr.fit(X_train, y_train) ########################################################################## # Both models are fit and used to predict a left out set. Performance # of the models is compared using the :func:`sklearn.metrics.r2_score` # function (coefficient of determination). pred_frr = frr.predict(X_test) pred_srr = srr.predict(X_test) frr_r2 = r2_score(y_test, pred_frr) srr_r2 = r2_score(y_test, pred_srr) print(frr_r2) print(srr_r2)
import numpy as np from fracridge import (fracridge, vec_len, FracRidgeRegressor, FracRidgeRegressorCV) from sklearn.linear_model import Ridge import pytest from sklearn.utils.estimator_checks import parametrize_with_checks @parametrize_with_checks([FracRidgeRegressor(), FracRidgeRegressorCV()]) def test_sklearn_compatible_estimator(estimator, check): check(estimator) def run_fracridge(X, y, fracs, jit): fracridge(X, y, fracs=fracs, jit=jit) @pytest.mark.parametrize("nn, pp", [(1000, 10), (10, 100), (284, 50)]) @pytest.mark.parametrize("bb", [(1), (2), (1000)]) @pytest.mark.parametrize("jit", [True, False]) def test_benchmark_fracridge(nn, pp, bb, jit, benchmark): X, y, _, _ = make_data(nn, pp, bb) fracs = np.arange(.1, 1.1, .1) benchmark(run_fracridge, X, y, fracs, jit) def make_data(nn, pp, bb, fit_intercept=False): np.random.seed(1) X = np.random.randn(nn, pp) y = np.random.randn(nn, bb).squeeze() if fit_intercept: