示例#1
0
def get_fa_loads(d_phens,
                 kmo_threshold=0.6,
                 bartlett_threshold=0.05,
                 n_shuffle=100,
                 test_factorability=False):
    """
    Get factors
    :param d_phens:
    :param loading_thresh:
    :param kmo_threshold:
    :param bartlett_threshold:
    :param n_shuffle:
    :param test_factorability:
    :return:
    """

    # Evaluation of the “factorability” of phenotypes
    if test_factorability:
        _, bartlett_value = calculate_bartlett_sphericity(d_phens)
        _, kmo_model = calculate_kmo(d_phens)
        if (kmo_model < kmo_threshold) or (bartlett_value > bartlett_threshold):
            # raise ValueError('Phenotypic data does not contain factors')
            warnings.warn('\nPhenotypic data does not contain factors')
            return None

    # Define the number of afctors by parallel analysis
    n_factors = pa(d_phens, n_shuffle)

    # factor analysis
    fa = FactorAnalyzer(n_factors=n_factors)
    fa.fit(d_phens)

    loads = pd.DataFrame(data=fa.loadings_, index=d_phens.columns)

    return loads
示例#2
0
def test_calculate_bartlett_sphericity():

    path = 'tests/data/test01.csv'
    data = pd.read_csv(path)
    s, p = calculate_bartlett_sphericity(data.values)

    assert_almost_equal(s, 14185)
    assert_almost_equal(p, 0)
示例#3
0
def perform_fa(df):
    chi_square_value, p_value = calculate_bartlett_sphericity(df)
    if p_value > 0.05:
        return (
            f"P-value=({p_value}). Statistically insignifincant, factorial analaisis can not be performed"
        )
    else:
        return (
            f"P-value=({p_value}). Statistically significant, factorial analaisis can be performed"
        )
示例#4
0
def stat_test(mat_features):
    m_corr = np.array(mat_features.corr())
    print('determinant feature matrix is: ', np.linalg.det(m_corr))

    chi_square_value, p_value = calculate_bartlett_sphericity(m_corr)
    print('Bartlett test')
    print('value of chi square: ', chi_square_value, 'p-value: ', p_value)

    kmo_all, kmo_model = calculate_kmo(m_corr)
    print('Kaiser-Meyer-Olkin test')
    print('value of kmo: ', kmo_model)
示例#5
0
 def bartlett_sphericity(self):
     chi_square_value, p_value = calculate_bartlett_sphericity(self.dataset)
     out = "chi^2 = %.3f" % chi_square_value
     if p_value < 0.001:
         out += ", p<0.0001"
     else:
         out += ", p=%.3f" % p_value
     self.logger.info(out)
     if p_value < self.p_alpha:
         self.logger.info("It is not an identity matrix.")
     else:
         self.logger.info("It is an identity matrix.")
     return 0
def out_Ade(df2):
    #充分性测试(Adequacy Test)
    #KMO值:0.9以上非常好;0.8以上好;0.7一般;0.6差;0.5很差;0.5以下不能接受;巴特利球形检验的值范围在0-1,越接近1,使用因子分析效果越好
    #Kaiser-Meyer-Olkin Test
    kmo_all, kmo_model = factor_analyzer.calculate_kmo(df2)
    print('kmo:{}'.format(kmo_model))
    #Bartlett's Test
    chi_square_value, p_value = factor_analyzer.calculate_bartlett_sphericity(
        df2)
    print('Bartlett_p:{}'.format(p_value))
    out_Ade_df = pd.DataFrame([['充分性测试(Adequacy Test)', None],
                               ['kmo', 'Bartlett_p'], [kmo_model, p_value]])
    #    print(out_Ade_df)
    return out_Ade_df
示例#7
0
def factor_analysis(df, name, all_factors=True):
    chi_square_value, p_value = calculate_bartlett_sphericity(df)
    kmo_all, kmo_model = calculate_kmo(df)
    pairwise_correlations = pairwiseCorr(df)
    print("\n" + "Chi_square_value and p_value:")
    print(chi_square_value, p_value)
    print("\n" + "Kmo model:")
    print(kmo_model)
    print("\n" + "Pairwise correlations:")
    print(pairwise_correlations)
    if all_factors == True:
        eigenvalues = get_factor_eigenvalues(df)
        print("\n" + "FA Eigenvalues:")
        print(eigenvalues)
    plot_correlations(df)
示例#8
0
def efa_model_tests(item_data):
    st.header('Model Tests')
    col1, col2 = st.beta_columns(2)

    with col1:
        st.write('Bartlett Sphericity')
        #runSphericity = st.button('Run Analysis')
        #if runSphericity:
        chi2, pValue = calculate_bartlett_sphericity(item_data)
        st.write(f'chi-squared = {chi2}, p = {pValue}')
    with col2:
        st.write('KMO Test')
        #runKmo = st.button('Run KMO')
        #if runKmo:
        kmoAll, kmoModel = calculate_kmo(item_data)
        st.write(f'KMO Statistic: {kmoModel}')
示例#9
0
def factor_analysis(df, name, not_test_set=True):
    chi_square_value, p_value = calculate_bartlett_sphericity(df)
    kmo_all, kmo_model = calculate_kmo(df)
    p_values, pairwise_correlations = pairwiseCorr(df)
    print("\n" + "Chi_square_value and p_value:")
    print(chi_square_value, p_value)
    print("\n" + "Kmo model:")
    print(kmo_model)
    print("\n" + "Statistically significant correlations:")
    print(p_values)
    print("\n" + "Most correlated variables:")
    print(pairwise_correlations)
    if not_test_set == True:
        eigenvalues = get_factor_eigenvalues(df)
        print("\n" + "FA Eigenvalues:")
        print(eigenvalues)
    plot_correlations(df, name)
示例#10
0
def FA(observied_variables, name):
    from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
    chi_square_value, p_value = calculate_bartlett_sphericity(
        observied_variables)
    print("chi_square_value", chi_square_value, "p-value:", p_value)
    from factor_analyzer.factor_analyzer import calculate_kmo
    kmo_all, kmo_model = calculate_kmo(observied_variables)
    print("KMO value", kmo_model)

    # Create factor analysis object and perform factor analysis
    if name == 'phone':
        fa = FactorAnalyzer(n_factors=2)
    if name == 'QOL':
        fa = FactorAnalyzer(n_factors=4)
    fa.fit_transform(observied_variables)
    # Check Eigenvalues
    eigen_values, vectors = fa.get_eigenvalues()
    print(eigen_values)
    """
    # Create scree plot using matplotlib
    plt.scatter(range(1,observied_variables.shape[1]+1),eigen_values)
    plt.plot(range(1,observied_variables.shape[1]+1),eigen_values)
    if name == 'phone':
        plt.title('Scree Plot for phone features',fontsize=24)
    if name == 'QOL':
        plt.title('Scree Plot for QOL features',fontsize=24)
    plt.xlabel('Factors', fontsize=18)
    plt.ylabel('Eigenvalue',fontsize=18)
    plt.grid()
    plt.show()
    """

    loadings = fa.loadings_
    print(pd.DataFrame(loadings, observied_variables.columns))
    #print(pd.DataFrame(fa.get_communalities()))
    return pd.DataFrame(loadings, observied_variables.columns)

    # Get variance of each factors
    print(
        pd.DataFrame(fa.get_factor_variance(),
                     ['SS Loadings', 'Proportion Var', 'Cumulative Var']))
示例#11
0
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo
from factor_analyzer import FactorAnalyzer
import seaborn as sns

df = pd.read_pickle('process_data_after_remove_variabel_remain_96.pkl')

# import dataset
X = df.drop('qc_salzrckhalt', axis=1)
y = df['qc_salzrckhalt']

# Adequcy Test :  need to evaluate the “factorability” of our dataset.
# Factorability means "can we found the factors in the dataset?"

# Bartletss`s Test
VarbList = df.columns
chi_square_value, p_value = calculate_bartlett_sphericity(X)
chi_square_value, p_value
# --> p Value = 0 that mean the test was statistically significant, the obvserved correlation matrix is not an identy matrix

# Kaiser_Meyer_Olkin Test
kmo_all, kmo_model = calculate_kmo(X)
kmo_model
# --> KMO value of 0.653 indicates a moderate suitableity for factory analysis  ' Source Cureton, E. E./ D'Agostino, R. B. 1983: Factor analysis: an applied approach. Hillside, NJ: Lawrence Erlbaum Associates, S. 389 f.

# Choosing Number of Factors
# Create factor analysis object and perform factor analysis
fa = FactorAnalyzer(rotation=None, n_factors=30)
fa.fit(X)
# Check Eigenvalues
ev, v = fa.get_eigenvalues()
ev
示例#12
0
# standardize data
df_standard = preprocessing.scale(df[vars_tot])

#--------------------------------------------
# Factor Analysis
#--------------------------------------------
'''NOTE: from the previous PCA we know that the first three principle components
    explain 72% of the variance. Hence we base our analysis on three components.
    
    The standard method of the FA package is minimum residual. Also possible:
        MLE, PCA. 
    '''

# Pre-tests
## Bartlett's test. H0: equal variance
bartlett_chi, bartlett_p = calculate_bartlett_sphericity(
    df[vars_tot])  # p = 0.0

## Kaiser-Meyer-Olkin (KMO) test. Measures data suitability; should be between 0 and 1, but above 0.6
kmo_all, kmo_model = calculate_kmo(df[vars_tot])  #kmo_model = 0.7297

#--------------------------------------------
# Factor Analysis
fa = FactorAnalyzer(rotation=None, n_factors=4)
fa.fit(df[vars_tot])
ev, v = fa.get_eigenvalues()
'''NOTE: First four factors have an eigen value greater than 1. Use those.'''

# Perform a parallel analysis
list_ev_rand = []

np.random.seed(10)
示例#13
0
 def test(self):
     self.chi_square_value, self.p_value = calculate_bartlett_sphericity(
         self.data[self.col])
     self.kmo_all, self.kmo_model = calculate_kmo(self.data[self.col])
     return self.chi_square_value, self.p_value, self.kmo_all, self.kmo_model
# Normalizing the data

# In[35]:

card_df_norm = MinMaxScaler().fit_transform(card_df)
pd.DataFrame(card_df_norm).head()

# ## 3.1 Dimensionality Reduction - Factor Analysis

# Bartlett’s test of sphericity checks whether or not the observed variables intercorrelate at all using the observed correlation matrix against the identity matrix. If the test found statistically insignificant, you should not employ a factor analysis
#

# In[36]:

chi_square_value, p_value = calculate_bartlett_sphericity(card_df_norm)
print(chi_square_value, p_value)

# ##### In this Bartlett ’s test, the p-value is 0. The test was statistically significant, indicating that the observed correlation matrix is not an identity matrix.
#

# Kaiser-Meyer-Olkin (KMO) Test measures the suitability of data for factor analysis. It determines the adequacy for each observed variable and for the complete model. KMO estimates the proportion of variance among all the observed variable. Lower proportion id more suitable for factor analysis. KMO values range between 0 and 1. Value of KMO less than 0.6 is considered inadequate.

# In[37]:

kmo_all, kmo_model = calculate_kmo(card_df_norm)
kmo_model

# ###### Here kmo_model value is 0.64, so it is adequate.

# ### Choosing the Number of Factors
'''NOTE: All reject H0 --> Not normal distributed '''
'''Conclusion: Use principle factors method '''
#--------------------------------------------
# Pre-tests
#--------------------------------------------
''' We perform two pre-tests
    1) Bartlett's test of sphericity: tests whether the correlation matrix 
        equals an identiy matrix (H0), which means that the variables are 
        unrelated;
    2) Kaiser-Meyer-Olkin test: Is a statistic that indicates the proportion
        of variance that might be caused by underlying factors. Test statistic
        should be over .5'
    '''

# Bartlett's test. H0: equal variance
bartlett_chi, bartlett_p = calculate_bartlett_sphericity(
    df_standard)  # p = 0.0

# Kaiser-Meyer-Olkin (KMO) test. Measures data suitability; should be between 0 and 1, but above 0.5
kmo_all, kmo_model = calculate_kmo(df_standard)  #kmo_model = 0.85
'''Note: looks good '''

#--------------------------------------------
# Determine number of factors
#--------------------------------------------
'''We use multiple selection criteria:
    1) Scree plot (elbow plot)
    2) Kaiser-Guttman rule
    3) Parallel analysis
    '''

# Get factor estimates