def calculate_py_output(test_name, factors, method, rotation, top_dir=None): """ Use the `FactorAnalyzer()` class to perform the factor analysis and return a dictionary with relevant output for given scenario. Parameters ---------- test_name : str The name of the test factors : int The number of factors method : str The rotation method rotation : str The type of rotation top_dir : str, optional The top directory for test data Defaults to `DATA_DIR`` Returns ------- output : dict A dictionary containing the outputs for all `OUTPUT_TYPES`. """ if top_dir is None: top_dir = DATA_DIR filename = join(top_dir, test_name + '.csv') data = pd.read_csv(filename) rotation = None if rotation == 'none' else rotation method = {'uls': 'minres'}.get(method, method) fa = FactorAnalyzer() fa.analyze(data, factors, method=method, rotation=rotation) evalues, values = fa.get_eigenvalues() return {'value': values, 'evalues': evalues, 'structure': fa.structure, 'loading': fa.loadings, 'uniquenesses': fa.get_uniqueness(), 'communalities': fa.get_communalities(), 'scores': fa.get_scores(data)}
# In[20]: from factor_analyzer import FactorAnalyzer fa = FactorAnalyzer() # In[28]: fa.analyze(train, 3, rotation=None) # In[29]: fa.loadings # In[30]: fa.get_uniqueness() # we have to select which feature have the heighest uniqueness value that feature is the first importent variable # ## Principle component Analysis # In[31]: from sklearn.decomposition import PCA pca = PCA(n_components=4) principalComponents = pca.fit_transform(df) principalDf = pd.DataFrame(data=principalComponents, columns=[ 'principal component 1', 'principal component 2', 'principal component 3', 'principal component 4' ])
[57, 46, 54, 46, 42], [38, 42, 41, 36, 41], [43, 47, 41, 53, 44], [45, 51, 53, 46, 53], [49, 56, 54, 61, 51], [35, 38, 57, 65, 57]]) seiseki_in = pd.DataFrame(seiseki_a, columns=subject) seiseki = pd.DataFrame(scale(seiseki_in), columns=seiseki_in.columns.values) fa = FactorAnalyzer() fa.analyze(seiseki, 2, rotation="varimax") #fa.analyze(seiseki, 2, rotation="promax") #fa.analyze(seiseki, 2, rotation=None) print('相関行列\n', seiseki.corr(method='pearson')) print() print('因子負荷量', fa.loadings.round(4)) # loadings print() print('独自性', fa.get_uniqueness().round(4)) # uniqueness print() print('因子分散', fa.get_factor_variance().round(4)) print() ################## #寄与率 kiyo = np.array([0, 0]) for i in range(len(fa.loadings)): u = np.array(fa.loadings.iloc[i]) kiyo = kiyo + u * u kiyo = pd.DataFrame(kiyo / len(fa.loadings), index=fa.loadings.columns.values).T kiyo = kiyo.append(pd.DataFrame(np.cumsum(kiyo, axis=1)), ignore_index=True).rename({ 0: '寄与率',