def calculate_bartlett_sphericity(x): """ Test the hypothesis that the correlation matrix is equal to the identity matrix.identity H0: The matrix of population correlations is equal to I. H1: The matrix of population correlations is not equal to I. The formula for Bartlett's Sphericity test is: .. math:: -1 * (n - 1 - ((2p + 5) / 6)) * ln(det(R)) Where R det(R) is the determinant of the correlation matrix, and p is the number of variables. Parameters ---------- x : array-like The array from which to calculate sphericity. Returns ------- statistic : float The chi-square value. p_value : float The associated p-value for the test. """ n, p = x.shape x_corr = corr(x) corr_det = np.linalg.det(x_corr) statistic = -np.log(corr_det) * (n - 1 - (2 * p + 5) / 6) degrees_of_freedom = p * (p - 1) / 2 p_value = stats.chi2.pdf(statistic, degrees_of_freedom) return statistic, p_value, degrees_of_freedom
def calculate_kmo(x): """ Calculate the Kaiser-Meyer-Olkin criterion for items and overall. This statistic represents the degree to which each observed variable is predicted, without error, by the other variables in the dataset. In general, a KMO < 0.6 is considered inadequate. Parameters ---------- x : array-like The array from which to calculate KMOs. Returns ------- kmo_per_variable : numpy array The KMO score per item. kmo_total : float The KMO score overall. """ # calculate the partial correlations partial_corr = partial_correlations(x) # calcualte the pair-wise correlations x_corr = corr(x) # fill matrix diagonals with zeros # and square all elements np.fill_diagonal(x_corr, 0) np.fill_diagonal(partial_corr, 0) partial_corr = partial_corr**2 x_corr = x_corr**2 # calculate KMO per item partial_corr_sum = np.sum(partial_corr, axis=0) corr_sum = np.sum(x_corr, axis=0) kmo_per_item = corr_sum / (corr_sum + partial_corr_sum) # calculate KMO overall corr_sum_total = np.sum(x_corr) partial_corr_sum_total = np.sum(partial_corr) kmo_total = corr_sum_total / (corr_sum_total + partial_corr_sum_total) return kmo_per_item, kmo_total
The chi-square value. p_value : float The associated p-value for the test. """ n, p = x.shape x_corr = corr(x) corr_det = np.linalg.det(x_corr) statistic = -np.log(corr_det) * (n - 1 - (2 * p + 5) / 6) degrees_of_freedom = p * (p - 1) / 2 p_value = stats.chi2.pdf(statistic, degrees_of_freedom) return statistic, p_value, degrees_of_freedom chi_square1, p1, dof1 = calculate_bartlett_sphericity(data1) observed = corr(data1) expected = np.dot(factor_loadings,factor_loadings.T) chi_square2, p2 = stats.chisquare(observed,expected) # Perform CFA on K factor model model_dict = {"F1": ['AbsPM25', 'NO', 'Noise_n', 'PCB118', 'PCB180', 'PM10Cu', 'PM25CU'], "F2": ['PM10Ni', 'PM10V', 'PM25Ni', 'PM25V'], "F3": ['BDE138', 'BDE17', 'BDE209', 'BDE66', 'BDE99', 'Green'], "F4": ['MEOHP', 'MnBP', 'X5cxMEPP'], "F5": ['CHCl3', 'DDE', 'THM'], "F6": ['BPA', 'Cotinine', 'MBzP', 'PFOA', 'Sb']} model_spec = ModelSpecificationParser.parse_model_specification_from_dict(data1, model_dict) cfa = ConfirmatoryFactorAnalyzer(model_spec, disp=False) cfa.fit(data1.values) # Problem 1d
def fit(self, X, y=None): """ Fit the factor analysis model using either minres, ml, or principal solutions. By default, use SMC as starting guesses. Parameters ---------- X : array-like The data to analyze. y : ignored Examples -------- >>> import pandas as pd >>> from factor_analyzer import FactorAnalyzer >>> df_features = pd.read_csv('tests/data/test02.csv') >>> fa = FactorAnalyzer(rotation=None) >>> fa.fit(df_features) FactorAnalyzer(bounds=(0.005, 1), impute='median', is_corr_matrix=False, method='minres', n_factors=3, rotation=None, rotation_kwargs={}, use_smc=True) >>> fa.loadings_ array([[-0.12991218, 0.16398154, 0.73823498], [ 0.03899558, 0.04658425, 0.01150343], [ 0.34874135, 0.61452341, -0.07255667], [ 0.45318006, 0.71926681, -0.07546472], [ 0.36688794, 0.44377343, -0.01737067], [ 0.74141382, -0.15008235, 0.29977512], [ 0.741675 , -0.16123009, -0.20744495], [ 0.82910167, -0.20519428, 0.04930817], [ 0.76041819, -0.23768727, -0.1206858 ], [ 0.81533404, -0.12494695, 0.17639683]]) """ # check if the data is a data frame, # so we can convert it to an array if isinstance(X, pd.DataFrame): X = X.copy().values else: X = X.copy() # now check the array, and make sure it # meets all of our expected criteria X = check_array(X, force_all_finite='allow-nan', estimator=self, copy=True) # check to see if there are any null values, and if # so impute using the desired imputation approach if np.isnan(X).any() and not self.is_corr_matrix: X = impute_values(X, how=self.impute) # get the correlation matrix if self.is_corr_matrix: corr_mtx = X else: corr_mtx = corr(X) self.std_ = np.std(X, axis=0) self.mean_ = np.mean(X, axis=0) # save the original correlation matrix self.corr_ = corr_mtx.copy() # fit factor analysis model if self.method == 'principal': loadings = self._fit_principal(X) else: loadings = self._fit_factor_analysis(corr_mtx) # only used if we do an oblique rotations; # default rotation matrix to None phi = None structure = None rotation_mtx = None # whether to rotate the loadings matrix if self.rotation is not None: if loadings.shape[1] <= 1: warnings.warn('No rotation will be performed when ' 'the number of factors equals 1.') else: if 'method' in self.rotation_kwargs: warnings.warn('You cannot pass a rotation method to ' '`rotation_kwargs`. This will be ignored.') self.rotation_kwargs.pop('method') rotator = Rotator(method=self.rotation, **self.rotation_kwargs) loadings = rotator.fit_transform(loadings) rotation_mtx = rotator.rotation_ phi = rotator.phi_ # update the rotation matrix for everything, except promax if self.rotation != 'promax': rotation_mtx = np.linalg.inv(rotation_mtx).T if self.n_factors > 1: # update loading signs to match column sums # this is to ensure that signs align with R signs = np.sign(loadings.sum(0)) signs[(signs == 0)] = 1 loadings = np.dot(loadings, np.diag(signs)) if phi is not None: # update phi, if it exists -- that is, if the rotation is oblique # create the structure matrix for any oblique rotation phi = np.dot(np.dot(np.diag(signs), phi), np.diag(signs)) structure = np.dot( loadings, phi) if self.rotation in OBLIQUE_ROTATIONS else None # resort the factors according to their variance variance = self._get_factor_variance(loadings)[0] new_order = list(reversed(np.argsort(variance))) loadings = loadings[:, new_order].copy() if structure is not None: structure = structure[:, new_order].copy() self.phi_ = phi self.structure_ = structure self.loadings_ = loadings self.rotation_matrix_ = rotation_mtx return self