Exemplo n.º 1
0
 def __init__(self, nbasis, basis_type='bspline'):
     self.nbasis = nbasis
     self.reg = LinearRegression()
     self.basis_type = basis_type
     self.coef = None
     if self.basis_type == 'fPCA':
         self.fpca_basis = FPCA(self.nbasis)
Exemplo n.º 2
0
    def test_regression_mixed(self):

        multivariate = np.array([[0, 0], [2, 7], [1, 7], [3, 9], [4, 16],
                                 [2, 14], [3, 5]])

        X = [
            multivariate,
            FDataBasis(Monomial(n_basis=3),
                       [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 1], [1, 0, 0],
                        [0, 1, 0], [0, 0, 1]])
        ]

        # y = 2 + sum([3, 1] * array) + int(3 * function)
        intercept = 2
        coefs_multivariate = np.array([3, 1])
        coefs_functions = FDataBasis(Monomial(n_basis=3), [[3, 0, 0]])
        y_integral = np.array([3, 3 / 2, 1, 4, 3, 3 / 2, 1])
        y_sum = multivariate @ coefs_multivariate
        y = 2 + y_sum + y_integral

        scalar = LinearRegression()
        scalar.fit(X, y)

        np.testing.assert_allclose(scalar.intercept_, intercept, atol=0.01)

        np.testing.assert_allclose(scalar.coef_[0],
                                   coefs_multivariate,
                                   atol=0.01)

        np.testing.assert_allclose(scalar.coef_[1].coefficients,
                                   coefs_functions.coefficients,
                                   atol=0.01)

        y_pred = scalar.predict(X)
        np.testing.assert_allclose(y_pred, y, atol=0.01)
Exemplo n.º 3
0
    def test_regression_single_explanatory(self):

        x_basis = Monomial(n_basis=7)
        x_fd = FDataBasis(x_basis, np.identity(7))

        beta_basis = Fourier(n_basis=5)
        beta_fd = FDataBasis(beta_basis, [1, 1, 1, 1, 1])
        y = [
            0.9999999999999993, 0.162381381441085, 0.08527083481359901,
            0.08519946930844623, 0.09532291032042489, 0.10550022969639987,
            0.11382675064746171
        ]

        scalar = LinearRegression(coef_basis=[beta_basis])
        scalar.fit(x_fd, y)
        np.testing.assert_allclose(scalar.coef_[0].coefficients,
                                   beta_fd.coefficients)
        np.testing.assert_allclose(scalar.intercept_, 0.0, atol=1e-6)

        y_pred = scalar.predict(x_fd)
        np.testing.assert_allclose(y_pred, y)

        scalar = LinearRegression(coef_basis=[beta_basis], fit_intercept=False)
        scalar.fit(x_fd, y)
        np.testing.assert_allclose(scalar.coef_[0].coefficients,
                                   beta_fd.coefficients)
        np.testing.assert_equal(scalar.intercept_, 0.0)

        y_pred = scalar.predict(x_fd)
        np.testing.assert_allclose(y_pred, y)
Exemplo n.º 4
0
    def test_error_beta_not_basis(self):
        """ Test that all beta are Basis objects. """

        x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7))
        y = [1 for _ in range(7)]
        beta = FDataBasis(Monomial(n_basis=7), np.identity(7))

        scalar = LinearRegression(coef_basis=[beta])
        with np.testing.assert_raises(TypeError):
            scalar.fit([x_fd], y)
Exemplo n.º 5
0
    def test_error_y_is_FData(self):
        """Tests that none of the explained variables is an FData object
        """
        x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7))
        y = list(FDataBasis(Monomial(n_basis=7), np.identity(7)))

        scalar = LinearRegression(coef_basis=[Fourier(n_basis=5)])

        with np.testing.assert_raises(ValueError):
            scalar.fit([x_fd], y)
Exemplo n.º 6
0
    def test_error_weights_negative(self):
        """ Test that none of the weights are negative. """

        x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7))
        y = [1 for _ in range(7)]
        weights = [-1 for _ in range(7)]
        beta = Monomial(n_basis=7)

        scalar = LinearRegression(coef_basis=[beta])
        with np.testing.assert_raises(ValueError):
            scalar.fit([x_fd], y, weights)
Exemplo n.º 7
0
    def test_error_X_not_FData(self):
        """Tests that at least one of the explanatory variables
        is an FData object. """

        x_fd = np.identity(7)
        y = np.zeros(7)

        scalar = LinearRegression(coef_basis=[Fourier(n_basis=5)])

        with np.testing.assert_warns(UserWarning):
            scalar.fit([x_fd], y)
Exemplo n.º 8
0
    def test_error_weights_lenght(self):
        """ Test that the number of weights is equal to the
        number of samples """

        x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7))
        y = [1 for _ in range(7)]
        weights = [1 for _ in range(8)]
        beta = Monomial(n_basis=7)

        scalar = LinearRegression(coef_basis=[beta])
        with np.testing.assert_raises(ValueError):
            scalar.fit([x_fd], y, weights)
Exemplo n.º 9
0
    def test_multivariate(self):

        def ignore_scalar_warning():
            warnings.filterwarnings(
                "ignore", category=UserWarning,
                message="All the covariates are scalar.")

        X, y = make_regression(n_samples=20, n_features=10,
                               random_state=1, bias=3.5)

        X_train, X_test, y_train, _ = train_test_split(
            X, y, random_state=2)

        for regularization_parameter in [0, 1, 10, 100]:

            with self.subTest(
                    regularization_parameter=regularization_parameter):

                sklearn_l2 = Ridge(alpha=regularization_parameter)
                skfda_l2 = LinearRegression(
                    regularization=L2Regularization(
                        regularization_parameter=regularization_parameter),
                )

                sklearn_l2.fit(X_train, y_train)
                with warnings.catch_warnings():
                    ignore_scalar_warning()
                    skfda_l2.fit(X_train, y_train)

                sklearn_y_pred = sklearn_l2.predict(X_test)
                with warnings.catch_warnings():
                    ignore_scalar_warning()
                    skfda_y_pred = skfda_l2.predict(X_test)

                np.testing.assert_allclose(
                    sklearn_l2.coef_, skfda_l2.coef_[0])

                np.testing.assert_allclose(
                    sklearn_l2.intercept_, skfda_l2.intercept_)

                np.testing.assert_allclose(
                    sklearn_y_pred, skfda_y_pred)
Exemplo n.º 10
0
    def test_regression_mixed_regularization(self):

        multivariate = np.array([[0, 0], [2, 7], [1, 7], [3, 9], [4, 16],
                                 [2, 14], [3, 5]])

        X = [
            multivariate,
            FDataBasis(Monomial(n_basis=3),
                       [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 1], [1, 0, 0],
                        [0, 1, 0], [0, 0, 1]])
        ]

        # y = 2 + sum([3, 1] * array) + int(3 * function)
        intercept = 2
        coefs_multivariate = np.array([3, 1])
        y_integral = np.array([3, 3 / 2, 1, 4, 3, 3 / 2, 1])
        y_sum = multivariate @ coefs_multivariate
        y = 2 + y_sum + y_integral

        scalar = LinearRegression(regularization=[
            TikhonovRegularization(lambda x: x),
            TikhonovRegularization(LinearDifferentialOperator(2))
        ])
        scalar.fit(X, y)

        np.testing.assert_allclose(scalar.intercept_, intercept, atol=0.01)

        np.testing.assert_allclose(scalar.coef_[0], [2.536739, 1.072186],
                                   atol=0.01)

        np.testing.assert_allclose(scalar.coef_[1].coefficients,
                                   [[2.125676, 2.450782, 5.808745e-4]],
                                   atol=0.01)

        y_pred = scalar.predict(X)
        np.testing.assert_allclose(y_pred, [
            5.349035, 16.456464, 13.361185, 23.930295, 32.650965, 23.961766,
            16.29029
        ],
                                   atol=0.01)
Exemplo n.º 11
0
    def test_regression_multiple_explanatory(self):
        y = [1, 2, 3, 4, 5, 6, 7]

        X = FDataBasis(Monomial(n_basis=7), np.identity(7))

        beta1 = BSpline(domain_range=(0, 1), n_basis=5)

        scalar = LinearRegression(coef_basis=[beta1])

        scalar.fit(X, y)

        np.testing.assert_allclose(scalar.intercept_.round(4),
                                   np.array([32.65]),
                                   rtol=1e-3)

        np.testing.assert_allclose(
            scalar.coef_[0].coefficients.round(4),
            np.array([[-28.6443, 80.3996, -188.587, 236.5832, -481.3449]]),
            rtol=1e-3)

        y_pred = scalar.predict(X)
        np.testing.assert_allclose(y_pred, y, atol=0.01)
Exemplo n.º 12
0
    def test_error_X_beta_len_distinct(self):
        """ Test that the number of beta bases and explanatory variables
        are not different """

        x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7))
        y = [1 for _ in range(7)]
        beta = Fourier(n_basis=5)

        scalar = LinearRegression(coef_basis=[beta])
        with np.testing.assert_raises(ValueError):
            scalar.fit([x_fd, x_fd], y)

        scalar = LinearRegression(coef_basis=[beta, beta])
        with np.testing.assert_raises(ValueError):
            scalar.fit([x_fd], y)
Exemplo n.º 13
0
    def test_error_y_X_samples_different(self):
        """ Test that the number of response samples and explanatory samples
        are not different """

        x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7))
        y = [1 for _ in range(8)]
        beta = Fourier(n_basis=5)

        scalar = LinearRegression(coef_basis=[beta])
        with np.testing.assert_raises(ValueError):
            scalar.fit([x_fd], y)

        x_fd = FDataBasis(Monomial(n_basis=8), np.identity(8))
        y = [1 for _ in range(7)]
        beta = Fourier(n_basis=5)

        scalar = LinearRegression(coef_basis=[beta])
        with np.testing.assert_raises(ValueError):
            scalar.fit([x_fd], y)
Exemplo n.º 14
0
class BasisRegression(object):
    """Class implementing functional linear models with basis functions for vector-valued covariates.

    Parameters
    ----------
    nbasis: int
        Number of basis functions.

    basis_type: str, default='bspline'
        Type of basis used, possible values are 'bspline', 'fourier' and 'fPCA'

    Attributes
    ----------
    reg: object
        Instance of skfda.ml.regression.LinearRegression

    coef: array, default=None
        Regression coefficients

    fpca_basis: object
        If basis_type='fPCA', instance of skfda.preprocessing.dim_reduction.projection.FPCA().
    """
    def __init__(self, nbasis, basis_type='bspline'):
        self.nbasis = nbasis
        self.reg = LinearRegression()
        self.basis_type = basis_type
        self.coef = None
        if self.basis_type == 'fPCA':
            self.fpca_basis = FPCA(self.nbasis)

    def data_to_basis(self, X, fit_fPCA=True):
        """Project the data to basis functions.

        Parameters
        ----------
        X: array, shape (n,n_points,d)
            Array of paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise linear paths,
            each composed of n_points.

        fit_fPCA: boolean, default=True
            If n_basis='fPCA' and fit_fPCA=True, the basis functions are fitted to be the functional principal
            components of X.

        Returns
        -------
        fd_basis: object
            Instance of skfda.representation.basis.FDataBasis, the basis representation of X, where the type of basis is
            determined by self.n_basis.
        """
        grid_points = np.linspace(0, 1, X.shape[1])
        fd = FDataGrid(X, grid_points)
        basis_vec = []
        for i in range(X.shape[2]):
            if self.basis_type == 'bspline':
                basis_vec.append(BSpline(n_basis=self.nbasis))
            elif self.basis_type == 'fourier':
                basis_vec.append(Fourier(n_basis=self.nbasis))
            elif self.basis_type == 'fPCA':
                basis_vec.append(BSpline(n_basis=7))

        basis = VectorValued(basis_vec)
        fd_basis = fd.to_basis(basis)
        if self.basis_type == 'fPCA':
            if fit_fPCA:
                self.fpca_basis = self.fpca_basis.fit(fd_basis)
            fd_basis = self.fpca_basis.transform(fd_basis)
        return fd_basis

    def fit(self, X, Y):
        """Fit the functional linear model to X and Y

        Parameters
        ----------
        X: array, shape (n,n_points,d)
            Array of training paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise
            linear paths, each composed of n_points.

        Y: array, shape (n)
            Array of target values.

        Returns
        -------
        reg: object
            Instance of skfda.ml.regression.LinearRegression
        """
        fd_basis = self.data_to_basis(X)
        self.reg.fit(fd_basis, Y)
        self.coef = self.reg.coef_
        return self.reg

    def predict(self, X):
        """Predict the output of self.reg for X.

        Parameters
        ----------
        X: array, shape (n,n_points,d)
            Array of training paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise
            linear paths, each composed of n_points.

        Returns
        -------
        Ypred: array, shape (n)
            Array of predicted values.
        """
        fd_basis = self.data_to_basis(X, fit_fPCA=False)
        return self.reg.predict(fd_basis)

    def get_loss(self, X, Y, plot=False):
        """Computes the empirical squared loss obtained with the functional linear model

        Parameters
        ----------
        X: array, shape (n,n_points,d)
            Array of training paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise
            linear paths, each composed of n_points.

        Y: array, shape (n)
            Array of target values.

        plot: boolean, default=False
            If True, plots the regression coefficients and a scatter plot of the target values Y against its predicted
            values Ypred to assess the quality of the fit.

        Returns
        -------
        hatL: float
            The squared loss, that is the sum of the squares of Y-Ypred, where Ypred are the fitted values of the Ridge
            regression of Y against signatures of X truncated at k.
        """
        Ypred = self.predict(X)
        if plot:
            plt.scatter(Y, Ypred)
            plt.plot([0.9 * np.min(Y), 1.1 * np.max(Y)],
                     [0.9 * np.min(Y), 1.1 * np.max(Y)],
                     '--',
                     color='black')
            plt.title("Ypred against Y")
            plt.show()
        return np.mean((Y - Ypred)**2)
Exemplo n.º 15
0
    def test_regression_regularization(self):

        x_basis = Monomial(n_basis=7)
        x_fd = FDataBasis(x_basis, np.identity(7))

        beta_basis = Fourier(n_basis=5)
        beta_fd = FDataBasis(beta_basis, [1.0403, 0, 0, 0, 0])
        y = [
            1.0000684777229512, 0.1623672257830915, 0.08521053851548224,
            0.08514200869281137, 0.09529138749665378, 0.10549625973303875,
            0.11384314859153018
        ]

        y_pred_compare = [
            0.890341, 0.370162, 0.196773, 0.110079, 0.058063, 0.023385,
            -0.001384
        ]

        scalar = LinearRegression(coef_basis=[beta_basis],
                                  regularization=TikhonovRegularization(
                                      LinearDifferentialOperator(2)))
        scalar.fit(x_fd, y)
        np.testing.assert_allclose(scalar.coef_[0].coefficients,
                                   beta_fd.coefficients,
                                   atol=1e-3)
        np.testing.assert_allclose(scalar.intercept_, -0.15, atol=1e-4)

        y_pred = scalar.predict(x_fd)
        np.testing.assert_allclose(y_pred, y_pred_compare, atol=1e-4)

        x_basis = Monomial(n_basis=3)
        x_fd = FDataBasis(x_basis,
                          [[1, 0, 0], [0, 1, 0], [0, 0, 1], [2, 0, 1]])

        beta_fd = FDataBasis(x_basis, [3, 2, 1])
        y = [1 + 13 / 3, 1 + 29 / 12, 1 + 17 / 10, 1 + 311 / 30]

        # Non regularized
        scalar = LinearRegression()
        scalar.fit(x_fd, y)
        np.testing.assert_allclose(scalar.coef_[0].coefficients,
                                   beta_fd.coefficients)
        np.testing.assert_allclose(scalar.intercept_, 1)

        y_pred = scalar.predict(x_fd)
        np.testing.assert_allclose(y_pred, y)

        # Regularized
        beta_fd_reg = FDataBasis(x_basis, [2.812, 3.043, 0])
        y_reg = [5.333, 3.419, 2.697, 11.366]

        scalar_reg = LinearRegression(regularization=TikhonovRegularization(
            LinearDifferentialOperator(2)))
        scalar_reg.fit(x_fd, y)
        np.testing.assert_allclose(scalar_reg.coef_[0].coefficients,
                                   beta_fd_reg.coefficients,
                                   atol=0.001)
        np.testing.assert_allclose(scalar_reg.intercept_, 0.998, atol=0.001)

        y_pred = scalar_reg.predict(x_fd)
        np.testing.assert_allclose(y_pred, y_reg, atol=0.001)