示例#1
0
def calculate_bartlett_sphericity(x):
    """
    Test the hypothesis that the correlation matrix
    is equal to the identity matrix.identity
    H0: The matrix of population correlations is equal to I.
    H1: The matrix of population correlations is not equal to I.
    The formula for Bartlett's Sphericity test is:
    .. math:: -1 * (n - 1 - ((2p + 5) / 6)) * ln(det(R))
    Where R det(R) is the determinant of the correlation matrix,
    and p is the number of variables.
    Parameters
    ----------
    x : array-like
        The array from which to calculate sphericity.
    Returns
    -------
    statistic : float
        The chi-square value.
    p_value : float
        The associated p-value for the test.
    """
    n, p = x.shape
    x_corr = corr(x)

    corr_det = np.linalg.det(x_corr)
    statistic = -np.log(corr_det) * (n - 1 - (2 * p + 5) / 6)
    degrees_of_freedom = p * (p - 1) / 2
    p_value = stats.chi2.pdf(statistic, degrees_of_freedom)
    return statistic, p_value, degrees_of_freedom
示例#2
0
def calculate_kmo(x):
    """
    Calculate the Kaiser-Meyer-Olkin criterion
    for items and overall. This statistic represents
    the degree to which each observed variable is
    predicted, without error, by the other variables
    in the dataset. In general, a KMO < 0.6 is considered
    inadequate.

    Parameters
    ----------
    x : array-like
        The array from which to calculate KMOs.

    Returns
    -------
    kmo_per_variable : numpy array
        The KMO score per item.
    kmo_total : float
        The KMO score overall.
    """

    # calculate the partial correlations
    partial_corr = partial_correlations(x)

    # calcualte the pair-wise correlations
    x_corr = corr(x)

    # fill matrix diagonals with zeros
    # and square all elements
    np.fill_diagonal(x_corr, 0)
    np.fill_diagonal(partial_corr, 0)

    partial_corr = partial_corr**2
    x_corr = x_corr**2

    # calculate KMO per item
    partial_corr_sum = np.sum(partial_corr, axis=0)
    corr_sum = np.sum(x_corr, axis=0)
    kmo_per_item = corr_sum / (corr_sum + partial_corr_sum)

    # calculate KMO overall
    corr_sum_total = np.sum(x_corr)
    partial_corr_sum_total = np.sum(partial_corr)
    kmo_total = corr_sum_total / (corr_sum_total + partial_corr_sum_total)
    return kmo_per_item, kmo_total
示例#3
0
        The chi-square value.
    p_value : float
        The associated p-value for the test.
    """
    n, p = x.shape
    x_corr = corr(x)

    corr_det = np.linalg.det(x_corr)
    statistic = -np.log(corr_det) * (n - 1 - (2 * p + 5) / 6)
    degrees_of_freedom = p * (p - 1) / 2
    p_value = stats.chi2.pdf(statistic, degrees_of_freedom)
    return statistic, p_value, degrees_of_freedom

chi_square1, p1, dof1 = calculate_bartlett_sphericity(data1)

observed = corr(data1)
expected = np.dot(factor_loadings,factor_loadings.T)
chi_square2, p2 = stats.chisquare(observed,expected)

# Perform CFA on K factor model
model_dict = {"F1": ['AbsPM25', 'NO', 'Noise_n', 'PCB118', 'PCB180', 'PM10Cu', 'PM25CU'],
               "F2": ['PM10Ni', 'PM10V', 'PM25Ni', 'PM25V'],
               "F3": ['BDE138', 'BDE17', 'BDE209', 'BDE66', 'BDE99', 'Green'],
               "F4": ['MEOHP', 'MnBP', 'X5cxMEPP'],
               "F5": ['CHCl3', 'DDE', 'THM'],
               "F6": ['BPA', 'Cotinine', 'MBzP', 'PFOA', 'Sb']}
model_spec = ModelSpecificationParser.parse_model_specification_from_dict(data1, model_dict)
cfa = ConfirmatoryFactorAnalyzer(model_spec, disp=False)
cfa.fit(data1.values)

# Problem 1d
示例#4
0
    def fit(self, X, y=None):
        """
        Fit the factor analysis model using either
        minres, ml, or principal solutions. By default, use SMC
        as starting guesses.

        Parameters
        ----------
        X : array-like
            The data to analyze.
        y : ignored

        Examples
        --------
        >>> import pandas as pd
        >>> from factor_analyzer import FactorAnalyzer
        >>> df_features = pd.read_csv('tests/data/test02.csv')
        >>> fa = FactorAnalyzer(rotation=None)
        >>> fa.fit(df_features)
        FactorAnalyzer(bounds=(0.005, 1), impute='median', is_corr_matrix=False,
                method='minres', n_factors=3, rotation=None, rotation_kwargs={},
                use_smc=True)
        >>> fa.loadings_
        array([[-0.12991218,  0.16398154,  0.73823498],
               [ 0.03899558,  0.04658425,  0.01150343],
               [ 0.34874135,  0.61452341, -0.07255667],
               [ 0.45318006,  0.71926681, -0.07546472],
               [ 0.36688794,  0.44377343, -0.01737067],
               [ 0.74141382, -0.15008235,  0.29977512],
               [ 0.741675  , -0.16123009, -0.20744495],
               [ 0.82910167, -0.20519428,  0.04930817],
               [ 0.76041819, -0.23768727, -0.1206858 ],
               [ 0.81533404, -0.12494695,  0.17639683]])
        """

        # check if the data is a data frame,
        # so we can convert it to an array
        if isinstance(X, pd.DataFrame):
            X = X.copy().values
        else:
            X = X.copy()

        # now check the array, and make sure it
        # meets all of our expected criteria
        X = check_array(X,
                        force_all_finite='allow-nan',
                        estimator=self,
                        copy=True)

        # check to see if there are any null values, and if
        # so impute using the desired imputation approach
        if np.isnan(X).any() and not self.is_corr_matrix:
            X = impute_values(X, how=self.impute)

        # get the correlation matrix
        if self.is_corr_matrix:
            corr_mtx = X
        else:
            corr_mtx = corr(X)
            self.std_ = np.std(X, axis=0)
            self.mean_ = np.mean(X, axis=0)

        # save the original correlation matrix
        self.corr_ = corr_mtx.copy()

        # fit factor analysis model
        if self.method == 'principal':
            loadings = self._fit_principal(X)
        else:
            loadings = self._fit_factor_analysis(corr_mtx)

        # only used if we do an oblique rotations;
        # default rotation matrix to None
        phi = None
        structure = None
        rotation_mtx = None

        # whether to rotate the loadings matrix
        if self.rotation is not None:
            if loadings.shape[1] <= 1:
                warnings.warn('No rotation will be performed when '
                              'the number of factors equals 1.')
            else:
                if 'method' in self.rotation_kwargs:
                    warnings.warn('You cannot pass a rotation method to '
                                  '`rotation_kwargs`. This will be ignored.')
                    self.rotation_kwargs.pop('method')
                rotator = Rotator(method=self.rotation, **self.rotation_kwargs)
                loadings = rotator.fit_transform(loadings)
                rotation_mtx = rotator.rotation_
                phi = rotator.phi_
                # update the rotation matrix for everything, except promax
                if self.rotation != 'promax':
                    rotation_mtx = np.linalg.inv(rotation_mtx).T

        if self.n_factors > 1:
            # update loading signs to match column sums
            # this is to ensure that signs align with R
            signs = np.sign(loadings.sum(0))
            signs[(signs == 0)] = 1
            loadings = np.dot(loadings, np.diag(signs))

            if phi is not None:
                # update phi, if it exists -- that is, if the rotation is oblique
                # create the structure matrix for any oblique rotation
                phi = np.dot(np.dot(np.diag(signs), phi), np.diag(signs))
                structure = np.dot(
                    loadings,
                    phi) if self.rotation in OBLIQUE_ROTATIONS else None

        # resort the factors according to their variance
        variance = self._get_factor_variance(loadings)[0]
        new_order = list(reversed(np.argsort(variance)))
        loadings = loadings[:, new_order].copy()

        if structure is not None:
            structure = structure[:, new_order].copy()

        self.phi_ = phi
        self.structure_ = structure

        self.loadings_ = loadings
        self.rotation_matrix_ = rotation_mtx
        return self