Exemplo n.º 1
0
 def _compute(self):
     ''' Perform all the steps required for a PCA. '''
     # Center the dataframe
     self.df -= self.df.mean()
     # If specified, reduce the dataframe
     if self.reduced is True:
         self.df /= self.df.std()
     # Apply the SVD
     self.U, self.W, self.V = util.svd(self.df, k=self.components)
     # Compute eigenvectors
     self.eigenvectors = np.asmatrix(self.V)
     # Compute eigenvalues
     self.eigenvalues = self.W ** 2
     # Sort the eigenvalues from high to low
     self.eigenvalues = sorted(self.eigenvalues, reverse=True)
     # Compute the row projections
     self._compute_row_projections()
     # Compute the variable correlations towards the eigenvectors
     self._compute_variable_correlations()
     # Compute the cumulative explained variance
     self._compute_explained_variance()
     # Compute the rowsum inertia
     self._compute_row_inertia()
     # Compute the total inertia
     self._compute_total_inertia()
Exemplo n.º 2
0
 def _compute(self):
     ''' Perform all the steps required for a CA. '''
     data = np.matrix(self.df, float)
     N = np.sum(data)
     # Stochastic matrix
     self.stochastic_matrix = data / N
     # Row sums
     self.row_sums = np.sum(self.stochastic_matrix, 1)
     # Row weights
     self.row_weights = (1 / self.row_sums).reshape(1, -1).tolist()[0]
     # Column sums
     self.col_sums = np.sum(self.stochastic_matrix, 0)
     # Column weights
     self.col_weights = (1 / self.col_sums).tolist()[0]
     # Expected values
     self.expected_values = np.prod((
         np.diag(np.sqrt(self.row_weights)),
         self.stochastic_matrix - self.row_sums * self.col_sums,
         np.diag(np.sqrt(self.col_weights))
     ))
     # Singular Value Decomposition
     self.U, self.W, self.V = util.svd(self.df, k=self.components)
     # Singular values
     d = np.diag(self.W.tolist())
     columns = ['Component {}'.format(i) for i in range(self.p)]
     # Row projections
     N = np.diag(np.sqrt(self.row_sums.reshape(1, -1).tolist()[0])) * self.U
     projections = np.diag(self.row_weights) * N * d
     self.row_projections = pd.DataFrame(
         projections,
         index=self.df.index,
         columns=columns
     )
     # Column projections
     M = np.diag(np.sqrt(self.col_sums.tolist()[0])) * np.transpose(self.V)
     projections = np.diag(self.col_weights) * M * d.T
     self.column_projections = pd.DataFrame(
         projections,
         index=self.df.columns,
         columns=columns
     )
     # Compute eigenvalues
     self.eigenvalues = self.W ** 2
     # Compute total inertia
     self.total_inertia = sum(self.eigenvalues)
     # Sort the eigenvalues from high to low
     self.eigenvalues = sorted(self.eigenvalues, reverse=True)
     self._compute_explained_variance()