Exemplo n.º 1
0
 def __init__(self, grid: FDataGrid, smoothed=False):
     self.init_grid = grid.copy()
     self.sample_points = self.init_grid.sample_points[0]
     self._nSeries = self.init_grid.data_matrix.shape[0]
     self._nObs = self.init_grid.data_matrix.shape[1]
     self._nVar = self.init_grid.data_matrix.shape[2]
     self.coordinates_grids = list(self.init_grid.coordinates)
     self.coordinate_names = self.init_grid.coordinate_names
     self._smoothed = smoothed
     if self._smoothed == True:
         self.coordinates_grids_dx1 = [
             grid.derivative(order=1) for grid in self.coordinates_grids
         ]
         self.coordinates_grids_dx2 = [
             grid.derivative(order=2) for grid in self.coordinates_grids
         ]
     self._scaled = False
Exemplo n.º 2
0
    def _fit_grid(self, X: FDataGrid, y=None):
        r"""Computes the n_components first principal components and saves them.

        The eigenvalues associated with these principal
        components are also saved. For more details about how it is implemented
        please view the referenced book, chapter 8.

        In summary, we are performing standard multivariate PCA over
        :math:`\frac{1}{\sqrt{N}} \mathbf{X} \mathbf{W}^{1/2}` where :math:`N`
        is the number of samples in the dataset, :math:`\mathbf{X}` is the data
        matrix and :math:`\mathbf{W}` is the weight matrix (this matrix
        defines the numerical integration). By default the weight matrix is
        obtained using the trapezoidal rule.

        Args:
            X (FDataGrid):
                the functional data object to be analysed in basis
                representation
            y (None, not used):
                only present for convention of a fit function

        Returns:
            self (object)

        References:
            .. [RS05-8-4-1] Ramsay, J., Silverman, B. W. (2005). Discretizing
            the functions. In *Functional Data Analysis* (p. 161). Springer.
        """

        # check that the number of components is smaller than the sample size
        if self.n_components > X.n_samples:
            raise AttributeError("The sample size must be bigger than the "
                                 "number of components")

        # check that we do not exceed limits for n_components as it should
        # be smaller than the number of attributes of the funcional data object
        if self.n_components > X.data_matrix.shape[1]:
            raise AttributeError("The number of components should be "
                                 "smaller than the number of discretization "
                                 "points of the functional data object.")

        # data matrix initialization
        fd_data = X.data_matrix.reshape(X.data_matrix.shape[:-1])

        # get the number of samples and the number of points of descretization
        n_samples, n_points_discretization = fd_data.shape

        # if centering is True then subtract the mean function to each function
        # in FDataBasis
        X = self._center_if_necessary(X)

        # establish weights for each point of discretization
        if not self.weights:
            # sample_points is a list with one array in the 1D case
            # in trapezoidal rule, suppose \deltax_k = x_k - x_{k-1}, the weight
            # vector is as follows: [\deltax_1/2, \deltax_1/2 + \deltax_2/2,
            # \deltax_2/2 + \deltax_3/2, ... , \deltax_n/2]
            differences = np.diff(X.sample_points[0])
            differences = np.concatenate(((0, ), differences, (0, )))
            self.weights = (differences[:-1] + differences[1:]) / 2
        elif callable(self.weights):
            self.weights = self.weights(X.sample_points[0])
            # if its a FDataGrid then we need to reduce the dimension to 1-D
            # array
            if isinstance(self.weights, FDataGrid):
                self.weights = np.squeeze(self.weights.data_matrix)

        weights_matrix = np.diag(self.weights)

        basis = FDataGrid(data_matrix=np.identity(n_points_discretization),
                          sample_points=X.sample_points)

        regularization_matrix = compute_penalty_matrix(
            basis_iterable=(basis, ),
            regularization_parameter=1,
            regularization=self.regularization)

        fd_data = np.transpose(
            np.linalg.solve(
                np.transpose(basis.data_matrix[..., 0] +
                             regularization_matrix), np.transpose(fd_data)))

        # see docstring for more information
        final_matrix = fd_data @ np.sqrt(weights_matrix) / np.sqrt(n_samples)

        pca = PCA(n_components=self.n_components)
        pca.fit(final_matrix)
        self.components_ = X.copy(data_matrix=np.transpose(
            np.linalg.solve(np.sqrt(weights_matrix),
                            np.transpose(pca.components_))))
        self.explained_variance_ratio_ = pca.explained_variance_ratio_
        self.explained_variance_ = pca.explained_variance_

        return self