import statsmodels.discrete.discrete_model as sm_mod import statsmodels.graphics.gofplots as plots import matplotlib.pyplot as plt import statsmodels.tools.tools as smtools import sklearn.metrics as skm from sklearn.model_selection import train_test_split from factor_analyzer import FactorAnalyzer from factor_analyzer import (ConfirmatoryFactorAnalyzer, ModelSpecificationParser) from factor_analyzer.utils import (corr, impute_values, partial_correlations, smc) data1 = pd.read_csv("https://donatello-telesca.squarespace.com/s/Exposure-t4yx.csv") # Perform Factor Analysis fa = FactorAnalyzer() # fa.set_params(n_factors=6,rotation=None) fa.set_params(n_factors=6,rotation='varimax') fa.fit(data1) # Check factors factor_loadings = fa.loadings_ eigen_values, vectors = fa.get_eigenvalues() communalities = fa.get_communalities() # Create scree plot # plt.scatter(range(1,29),eigen_values) # plt.plot(range(1,29),eigen_values) # plt.title('Scree Plot') # plt.xlabel('Factors') # plt.ylabel('Eigenvalue') # plt.grid() # plt.show() def clump_factor_vars(factor_loadings,factor_num):
print(df.columns) df.drop(['gender', 'education', 'age'], axis=1, inplace=True) df.dropna(inplace=True) print(df.info()) print(df.head()) # Before you perform factor analysis, you need to evaluate the “factorability” of our dataset. chi_square_value, p_value = calculate_bartlett_sphericity(df) print(chi_square_value, p_value) # Create factor analysis object and perform factor analysis fa = FactorAnalyzer() fa.set_params(n_factors=25, rotation=None) fa.fit(df) # Check Eigenvalues ev, v = fa.get_eigenvalues() print(ev) # Here, you can see only for 6-factors eigenvalues are greater than one. It means we need to choose only 6 factors (or unobserved variables). # Create scree plot using matplotlib plt.scatter(range(1, df.shape[1] + 1), ev) plt.plot(range(1, df.shape[1] + 1), ev) plt.title('Scree Plot') plt.xlabel('Factors') plt.ylabel('Eigenvalue') plt.grid() plt.show()