def calculate_py_output(test_name, factors, method, rotation, top_dir=None): """ Use the `FactorAnalyzer()` class to perform the factor analysis and return a dictionary with relevant output for given scenario. Parameters ---------- test_name : str The name of the test factors : int The number of factors method : str The rotation method rotation : str The type of rotation top_dir : str, optional The top directory for test data Defaults to `DATA_DIR`` Returns ------- output : dict A dictionary containing the outputs for all `OUTPUT_TYPES`. """ if top_dir is None: top_dir = DATA_DIR filename = join(top_dir, test_name + '.csv') data = pd.read_csv(filename) rotation = None if rotation == 'none' else rotation method = {'uls': 'minres'}.get(method, method) fa = FactorAnalyzer() fa.analyze(data, factors, method=method, rotation=rotation) evalues, values = fa.get_eigenvalues() return {'value': values, 'evalues': evalues, 'structure': fa.structure, 'loading': fa.loadings, 'uniquenesses': fa.get_uniqueness(), 'communalities': fa.get_communalities(), 'scores': fa.get_scores(data)}
def run(self, dfx, n_factors=3): self.n_factors = n_factors msg = {} x_numer_cols, x_cate_cols = ParseDFtypes(dfx) if x_numer_cols == []: logging.error( 'All input dfx are no numeric columns, Please check your input dfx data!' ) msg['error'] = 'All input dfx are no numeric columns, Please check your input dfx data!' return {'result': pd.DataFrame(), 'msg': msg} else: if x_cate_cols != []: logging.warning( 'input dfx has non-numeric columns: %s, will ignore these columns!' % x_cate_cols) msg['warning'] = 'input dfx has non-numeric columns: %s, will ignore these columns!' % x_cate_cols dfu = dfx[x_numer_cols] fa = FactorAnalyzer() fa.analyze(dfu, n_factors, rotation=None) l = fa.loadings c = fa.get_communalities() s = fa.get_scores(dfu) l.columns = ['因子%s荷载系数' % (i + 1) for i in range(n_factors)] c.columns = ['共同度'] s.columns = ['因子%s' % (i + 1) for i in range(n_factors)] res = l.join(c) return {'result': res, 'msg': msg, 'factor': s}
#Factor loadings fa.loadings # In[22]: #communalities fa.get_communalities() # In[23]: #get_factor_variance fa.get_factor_variance() # In[26]: Fac_score = fa.get_scores(df) # In[27]: print(Fac_score.head()) # # Cluster Analysis using factor scores # In[28]: from sklearn.cluster import KMeans from sklearn.metrics import silhouette_samples, silhouette_score # In[82]: n_clusters = [5, 10, 15, 16, 17, 18, 19, 20, 25, 30]