def __init__(self, nbasis, basis_type='bspline'): self.nbasis = nbasis self.reg = LinearRegression() self.basis_type = basis_type self.coef = None if self.basis_type == 'fPCA': self.fpca_basis = FPCA(self.nbasis)
def test_regression_mixed(self): multivariate = np.array([[0, 0], [2, 7], [1, 7], [3, 9], [4, 16], [2, 14], [3, 5]]) X = [ multivariate, FDataBasis(Monomial(n_basis=3), [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]]) ] # y = 2 + sum([3, 1] * array) + int(3 * function) intercept = 2 coefs_multivariate = np.array([3, 1]) coefs_functions = FDataBasis(Monomial(n_basis=3), [[3, 0, 0]]) y_integral = np.array([3, 3 / 2, 1, 4, 3, 3 / 2, 1]) y_sum = multivariate @ coefs_multivariate y = 2 + y_sum + y_integral scalar = LinearRegression() scalar.fit(X, y) np.testing.assert_allclose(scalar.intercept_, intercept, atol=0.01) np.testing.assert_allclose(scalar.coef_[0], coefs_multivariate, atol=0.01) np.testing.assert_allclose(scalar.coef_[1].coefficients, coefs_functions.coefficients, atol=0.01) y_pred = scalar.predict(X) np.testing.assert_allclose(y_pred, y, atol=0.01)
def test_regression_single_explanatory(self): x_basis = Monomial(n_basis=7) x_fd = FDataBasis(x_basis, np.identity(7)) beta_basis = Fourier(n_basis=5) beta_fd = FDataBasis(beta_basis, [1, 1, 1, 1, 1]) y = [ 0.9999999999999993, 0.162381381441085, 0.08527083481359901, 0.08519946930844623, 0.09532291032042489, 0.10550022969639987, 0.11382675064746171 ] scalar = LinearRegression(coef_basis=[beta_basis]) scalar.fit(x_fd, y) np.testing.assert_allclose(scalar.coef_[0].coefficients, beta_fd.coefficients) np.testing.assert_allclose(scalar.intercept_, 0.0, atol=1e-6) y_pred = scalar.predict(x_fd) np.testing.assert_allclose(y_pred, y) scalar = LinearRegression(coef_basis=[beta_basis], fit_intercept=False) scalar.fit(x_fd, y) np.testing.assert_allclose(scalar.coef_[0].coefficients, beta_fd.coefficients) np.testing.assert_equal(scalar.intercept_, 0.0) y_pred = scalar.predict(x_fd) np.testing.assert_allclose(y_pred, y)
def test_error_beta_not_basis(self): """ Test that all beta are Basis objects. """ x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7)) y = [1 for _ in range(7)] beta = FDataBasis(Monomial(n_basis=7), np.identity(7)) scalar = LinearRegression(coef_basis=[beta]) with np.testing.assert_raises(TypeError): scalar.fit([x_fd], y)
def test_error_y_is_FData(self): """Tests that none of the explained variables is an FData object """ x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7)) y = list(FDataBasis(Monomial(n_basis=7), np.identity(7))) scalar = LinearRegression(coef_basis=[Fourier(n_basis=5)]) with np.testing.assert_raises(ValueError): scalar.fit([x_fd], y)
def test_error_weights_negative(self): """ Test that none of the weights are negative. """ x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7)) y = [1 for _ in range(7)] weights = [-1 for _ in range(7)] beta = Monomial(n_basis=7) scalar = LinearRegression(coef_basis=[beta]) with np.testing.assert_raises(ValueError): scalar.fit([x_fd], y, weights)
def test_error_X_not_FData(self): """Tests that at least one of the explanatory variables is an FData object. """ x_fd = np.identity(7) y = np.zeros(7) scalar = LinearRegression(coef_basis=[Fourier(n_basis=5)]) with np.testing.assert_warns(UserWarning): scalar.fit([x_fd], y)
def test_error_weights_lenght(self): """ Test that the number of weights is equal to the number of samples """ x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7)) y = [1 for _ in range(7)] weights = [1 for _ in range(8)] beta = Monomial(n_basis=7) scalar = LinearRegression(coef_basis=[beta]) with np.testing.assert_raises(ValueError): scalar.fit([x_fd], y, weights)
def test_multivariate(self): def ignore_scalar_warning(): warnings.filterwarnings( "ignore", category=UserWarning, message="All the covariates are scalar.") X, y = make_regression(n_samples=20, n_features=10, random_state=1, bias=3.5) X_train, X_test, y_train, _ = train_test_split( X, y, random_state=2) for regularization_parameter in [0, 1, 10, 100]: with self.subTest( regularization_parameter=regularization_parameter): sklearn_l2 = Ridge(alpha=regularization_parameter) skfda_l2 = LinearRegression( regularization=L2Regularization( regularization_parameter=regularization_parameter), ) sklearn_l2.fit(X_train, y_train) with warnings.catch_warnings(): ignore_scalar_warning() skfda_l2.fit(X_train, y_train) sklearn_y_pred = sklearn_l2.predict(X_test) with warnings.catch_warnings(): ignore_scalar_warning() skfda_y_pred = skfda_l2.predict(X_test) np.testing.assert_allclose( sklearn_l2.coef_, skfda_l2.coef_[0]) np.testing.assert_allclose( sklearn_l2.intercept_, skfda_l2.intercept_) np.testing.assert_allclose( sklearn_y_pred, skfda_y_pred)
def test_regression_mixed_regularization(self): multivariate = np.array([[0, 0], [2, 7], [1, 7], [3, 9], [4, 16], [2, 14], [3, 5]]) X = [ multivariate, FDataBasis(Monomial(n_basis=3), [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]]) ] # y = 2 + sum([3, 1] * array) + int(3 * function) intercept = 2 coefs_multivariate = np.array([3, 1]) y_integral = np.array([3, 3 / 2, 1, 4, 3, 3 / 2, 1]) y_sum = multivariate @ coefs_multivariate y = 2 + y_sum + y_integral scalar = LinearRegression(regularization=[ TikhonovRegularization(lambda x: x), TikhonovRegularization(LinearDifferentialOperator(2)) ]) scalar.fit(X, y) np.testing.assert_allclose(scalar.intercept_, intercept, atol=0.01) np.testing.assert_allclose(scalar.coef_[0], [2.536739, 1.072186], atol=0.01) np.testing.assert_allclose(scalar.coef_[1].coefficients, [[2.125676, 2.450782, 5.808745e-4]], atol=0.01) y_pred = scalar.predict(X) np.testing.assert_allclose(y_pred, [ 5.349035, 16.456464, 13.361185, 23.930295, 32.650965, 23.961766, 16.29029 ], atol=0.01)
def test_regression_multiple_explanatory(self): y = [1, 2, 3, 4, 5, 6, 7] X = FDataBasis(Monomial(n_basis=7), np.identity(7)) beta1 = BSpline(domain_range=(0, 1), n_basis=5) scalar = LinearRegression(coef_basis=[beta1]) scalar.fit(X, y) np.testing.assert_allclose(scalar.intercept_.round(4), np.array([32.65]), rtol=1e-3) np.testing.assert_allclose( scalar.coef_[0].coefficients.round(4), np.array([[-28.6443, 80.3996, -188.587, 236.5832, -481.3449]]), rtol=1e-3) y_pred = scalar.predict(X) np.testing.assert_allclose(y_pred, y, atol=0.01)
def test_error_X_beta_len_distinct(self): """ Test that the number of beta bases and explanatory variables are not different """ x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7)) y = [1 for _ in range(7)] beta = Fourier(n_basis=5) scalar = LinearRegression(coef_basis=[beta]) with np.testing.assert_raises(ValueError): scalar.fit([x_fd, x_fd], y) scalar = LinearRegression(coef_basis=[beta, beta]) with np.testing.assert_raises(ValueError): scalar.fit([x_fd], y)
def test_error_y_X_samples_different(self): """ Test that the number of response samples and explanatory samples are not different """ x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7)) y = [1 for _ in range(8)] beta = Fourier(n_basis=5) scalar = LinearRegression(coef_basis=[beta]) with np.testing.assert_raises(ValueError): scalar.fit([x_fd], y) x_fd = FDataBasis(Monomial(n_basis=8), np.identity(8)) y = [1 for _ in range(7)] beta = Fourier(n_basis=5) scalar = LinearRegression(coef_basis=[beta]) with np.testing.assert_raises(ValueError): scalar.fit([x_fd], y)
class BasisRegression(object): """Class implementing functional linear models with basis functions for vector-valued covariates. Parameters ---------- nbasis: int Number of basis functions. basis_type: str, default='bspline' Type of basis used, possible values are 'bspline', 'fourier' and 'fPCA' Attributes ---------- reg: object Instance of skfda.ml.regression.LinearRegression coef: array, default=None Regression coefficients fpca_basis: object If basis_type='fPCA', instance of skfda.preprocessing.dim_reduction.projection.FPCA(). """ def __init__(self, nbasis, basis_type='bspline'): self.nbasis = nbasis self.reg = LinearRegression() self.basis_type = basis_type self.coef = None if self.basis_type == 'fPCA': self.fpca_basis = FPCA(self.nbasis) def data_to_basis(self, X, fit_fPCA=True): """Project the data to basis functions. Parameters ---------- X: array, shape (n,n_points,d) Array of paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise linear paths, each composed of n_points. fit_fPCA: boolean, default=True If n_basis='fPCA' and fit_fPCA=True, the basis functions are fitted to be the functional principal components of X. Returns ------- fd_basis: object Instance of skfda.representation.basis.FDataBasis, the basis representation of X, where the type of basis is determined by self.n_basis. """ grid_points = np.linspace(0, 1, X.shape[1]) fd = FDataGrid(X, grid_points) basis_vec = [] for i in range(X.shape[2]): if self.basis_type == 'bspline': basis_vec.append(BSpline(n_basis=self.nbasis)) elif self.basis_type == 'fourier': basis_vec.append(Fourier(n_basis=self.nbasis)) elif self.basis_type == 'fPCA': basis_vec.append(BSpline(n_basis=7)) basis = VectorValued(basis_vec) fd_basis = fd.to_basis(basis) if self.basis_type == 'fPCA': if fit_fPCA: self.fpca_basis = self.fpca_basis.fit(fd_basis) fd_basis = self.fpca_basis.transform(fd_basis) return fd_basis def fit(self, X, Y): """Fit the functional linear model to X and Y Parameters ---------- X: array, shape (n,n_points,d) Array of training paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise linear paths, each composed of n_points. Y: array, shape (n) Array of target values. Returns ------- reg: object Instance of skfda.ml.regression.LinearRegression """ fd_basis = self.data_to_basis(X) self.reg.fit(fd_basis, Y) self.coef = self.reg.coef_ return self.reg def predict(self, X): """Predict the output of self.reg for X. Parameters ---------- X: array, shape (n,n_points,d) Array of training paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise linear paths, each composed of n_points. Returns ------- Ypred: array, shape (n) Array of predicted values. """ fd_basis = self.data_to_basis(X, fit_fPCA=False) return self.reg.predict(fd_basis) def get_loss(self, X, Y, plot=False): """Computes the empirical squared loss obtained with the functional linear model Parameters ---------- X: array, shape (n,n_points,d) Array of training paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise linear paths, each composed of n_points. Y: array, shape (n) Array of target values. plot: boolean, default=False If True, plots the regression coefficients and a scatter plot of the target values Y against its predicted values Ypred to assess the quality of the fit. Returns ------- hatL: float The squared loss, that is the sum of the squares of Y-Ypred, where Ypred are the fitted values of the Ridge regression of Y against signatures of X truncated at k. """ Ypred = self.predict(X) if plot: plt.scatter(Y, Ypred) plt.plot([0.9 * np.min(Y), 1.1 * np.max(Y)], [0.9 * np.min(Y), 1.1 * np.max(Y)], '--', color='black') plt.title("Ypred against Y") plt.show() return np.mean((Y - Ypred)**2)
def test_regression_regularization(self): x_basis = Monomial(n_basis=7) x_fd = FDataBasis(x_basis, np.identity(7)) beta_basis = Fourier(n_basis=5) beta_fd = FDataBasis(beta_basis, [1.0403, 0, 0, 0, 0]) y = [ 1.0000684777229512, 0.1623672257830915, 0.08521053851548224, 0.08514200869281137, 0.09529138749665378, 0.10549625973303875, 0.11384314859153018 ] y_pred_compare = [ 0.890341, 0.370162, 0.196773, 0.110079, 0.058063, 0.023385, -0.001384 ] scalar = LinearRegression(coef_basis=[beta_basis], regularization=TikhonovRegularization( LinearDifferentialOperator(2))) scalar.fit(x_fd, y) np.testing.assert_allclose(scalar.coef_[0].coefficients, beta_fd.coefficients, atol=1e-3) np.testing.assert_allclose(scalar.intercept_, -0.15, atol=1e-4) y_pred = scalar.predict(x_fd) np.testing.assert_allclose(y_pred, y_pred_compare, atol=1e-4) x_basis = Monomial(n_basis=3) x_fd = FDataBasis(x_basis, [[1, 0, 0], [0, 1, 0], [0, 0, 1], [2, 0, 1]]) beta_fd = FDataBasis(x_basis, [3, 2, 1]) y = [1 + 13 / 3, 1 + 29 / 12, 1 + 17 / 10, 1 + 311 / 30] # Non regularized scalar = LinearRegression() scalar.fit(x_fd, y) np.testing.assert_allclose(scalar.coef_[0].coefficients, beta_fd.coefficients) np.testing.assert_allclose(scalar.intercept_, 1) y_pred = scalar.predict(x_fd) np.testing.assert_allclose(y_pred, y) # Regularized beta_fd_reg = FDataBasis(x_basis, [2.812, 3.043, 0]) y_reg = [5.333, 3.419, 2.697, 11.366] scalar_reg = LinearRegression(regularization=TikhonovRegularization( LinearDifferentialOperator(2))) scalar_reg.fit(x_fd, y) np.testing.assert_allclose(scalar_reg.coef_[0].coefficients, beta_fd_reg.coefficients, atol=0.001) np.testing.assert_allclose(scalar_reg.intercept_, 0.998, atol=0.001) y_pred = scalar_reg.predict(x_fd) np.testing.assert_allclose(y_pred, y_reg, atol=0.001)