def _compute(self): ''' Perform all the steps required for a PCA. ''' # Center the dataframe self.df -= self.df.mean() # If specified, reduce the dataframe if self.reduced is True: self.df /= self.df.std() # Apply the SVD self.U, self.W, self.V = util.svd(self.df, k=self.components) # Compute eigenvectors self.eigenvectors = np.asmatrix(self.V) # Compute eigenvalues self.eigenvalues = self.W ** 2 # Sort the eigenvalues from high to low self.eigenvalues = sorted(self.eigenvalues, reverse=True) # Compute the row projections self._compute_row_projections() # Compute the variable correlations towards the eigenvectors self._compute_variable_correlations() # Compute the cumulative explained variance self._compute_explained_variance() # Compute the rowsum inertia self._compute_row_inertia() # Compute the total inertia self._compute_total_inertia()
def _compute(self): ''' Perform all the steps required for a CA. ''' data = np.matrix(self.df, float) N = np.sum(data) # Stochastic matrix self.stochastic_matrix = data / N # Row sums self.row_sums = np.sum(self.stochastic_matrix, 1) # Row weights self.row_weights = (1 / self.row_sums).reshape(1, -1).tolist()[0] # Column sums self.col_sums = np.sum(self.stochastic_matrix, 0) # Column weights self.col_weights = (1 / self.col_sums).tolist()[0] # Expected values self.expected_values = np.prod(( np.diag(np.sqrt(self.row_weights)), self.stochastic_matrix - self.row_sums * self.col_sums, np.diag(np.sqrt(self.col_weights)) )) # Singular Value Decomposition self.U, self.W, self.V = util.svd(self.df, k=self.components) # Singular values d = np.diag(self.W.tolist()) columns = ['Component {}'.format(i) for i in range(self.p)] # Row projections N = np.diag(np.sqrt(self.row_sums.reshape(1, -1).tolist()[0])) * self.U projections = np.diag(self.row_weights) * N * d self.row_projections = pd.DataFrame( projections, index=self.df.index, columns=columns ) # Column projections M = np.diag(np.sqrt(self.col_sums.tolist()[0])) * np.transpose(self.V) projections = np.diag(self.col_weights) * M * d.T self.column_projections = pd.DataFrame( projections, index=self.df.columns, columns=columns ) # Compute eigenvalues self.eigenvalues = self.W ** 2 # Compute total inertia self.total_inertia = sum(self.eigenvalues) # Sort the eigenvalues from high to low self.eigenvalues = sorted(self.eigenvalues, reverse=True) self._compute_explained_variance()