def rotated_scaled_fa(n_comp, arr_pq,varimax_=True): '''Perform factor analysis on a matrix IN: - n_comp, int, number of latent dimensions - arr_pq, arr, shape: samples (persons) x features (questions) - varimax_, bool, whether to perform a varimax rotation (default=True) OUT: - arr_qd, arr, shape: features x latent-dimension - arr_pd, arr, shape: samples x latent dimensions ''' fa = FactorAnalysis(n_comp) fa.fit(arr_pq) arr_pd = fa.transform(arr_pq) arr_qd = fa.components_.T ## do the varimax-rotation if varimax_ == True: arr_dp = np.transpose(arr_pd) L1,T= fr.rotate_factors(arr_qd,'varimax') arr_qd_new = np.dot(arr_qd,T) T_m1 = np.linalg.inv(T) arr_pd_new = np.dot(T_m1,arr_dp) arr_pd_new = np.transpose(arr_pd_new) return arr_qd_new, arr_pd_new else: return arr_qd, arr_pd
def compute_factors(mri, n_components): pca = sklearn.decomposition.PCA() mri_a = (mri / mri.std('case')).values pca.fit(mri_a) rotated_components, rotation = factor_rotation.rotate_factors( pca.components_[:n_components, :].T, 'varimax', ) rotated_components = rotated_components.T factors_a = pca.transform(mri_a)[:, :n_components] @ rotation # Check results X = (mri / mri.std('case')) X = X - X.mean('case') X = X.values X_rec = factors_a @ rotated_components err = np.mean((X - X_rec)**2) / np.mean(X**2) assert err < 0.05, "High reconstruction error {}".format(err) # Add metadata back factors = xr.DataArray( factors_a, dims=['case', 'factor'], coords={ 'case': mri.coords['case'], 'factor': np.array(range(1, n_components+1), 'int8'), }, ) loadings = xr.DataArray( rotated_components, dims=['factor', 'cad_feature'], coords={ 'factor': np.array(range(1, n_components+1), 'int8'), 'cad_feature': mri.coords['cad_feature'], } ) return factors, loadings
def quartimax(unrotated): print('starting factor quartimax rotation...') return fr.rotate_factors(unrotated, 'quartimax', dtype=torch.float64, device=torch.device("cuda"))
def parsimony(unrotated): print('starting factor parsimony rotation...') return fr.rotate_factors(unrotated, 'parsimony', dtype=torch.float64, device=torch.device("cuda"))
def quartimax(unrotated): print('starting factor quartimax rotation...') return fr.rotate_factors(unrotated, 'quartimax_CF')
[ 0.47105995, 0.85757736], [-0.05816789, 0.31683709], [-1.3511985 , -0.11610641], [ 1.80523345, -0.14549883]]) M_target = np.array([[ 1.15424697, 0.2724154 ], [ 0.7893224 , 0.66576866], [-0.71227541, 0.55254571], [-0.84737515, 0.41528169], [-0.12133101, -1.28176304], [-0.60248373, -0.5405648 ], [ 0.45355659, 0.54495004], [ 0.62044144, -1.83902599]]) #analytic method T_analytic= fr.target_rotation(M,M_target) #numerical method using a gradient projection algorithm (GPA) L,T = fr.rotate_factors(M,'target',M_target,'orthogonal') print(np.allclose(T,T_analytic,atol=1e-4)) #numerical method using a gradient projection algorithm (GPA) with lower level functions #define objective function vgQ = lambda L=None, A=None, T=None: fr._gpa_rotation.vgQ_target(M_target,L=L,A=A,T=T) #define starting point T_start = T_analytic #solve L, phi, T, table = fr._gpa_rotation.GPA(M, T=T_analytic, vgQ=vgQ, rotation_method='orthogonal') #comparison if np.allclose(T,T_analytic): print(True) else: it_optim = vgQ(A=M, T=T)[0] an_optim = vgQ(A=M, T=T_analytic)[0] #print('Iterative algorithm optim: %f' % it_optim)
def parsimax(unrotated): print('starting factor parsimax rotation...') return fr.rotate_factors(unrotated, 'parsimax')
def varimax(unrotated): print('starting varimax rotation...') return fr.rotate_factors(unrotated, 'varimax_CF')
def PCA(inde, countr): pd_A = pd.read_csv( '/home/striker/Factor-Analysis/Output/Intermediate_Output/' + str(inde) + '/' + str(countr) + '.csv', header=None) pd_A = pd_A.iloc[:, :-1] temp_pd_A = pd_A pd_A = pd_A.loc[(pd_A != 0).any(axis=0), :] pd_A = pd_A.loc[:, (pd_A != 0).any(axis=0)] X_std = StandardScaler().fit_transform(pd_A) cor_mat = np.corrcoef(X_std.T) eigenvalues = np.linalg.eigvals(cor_mat) _eigenvectors = np.linalg.eig(cor_mat)[1] eigenvectors = _eigenvectors * np.sign(np.sum(_eigenvectors, 0)) new_eig_vectors = eigenvectors * pow(eigenvalues, .5) eig_pairs = [(np.abs(eigenvalues[i]), new_eig_vectors[:, i]) for i in range(len(eigenvalues))] eig_pairs.sort(key=lambda x: x[0], reverse=True) new_eig_pairs = [] for i in eig_pairs: if i[0] > 1: new_eig_pairs.append(i[1].tolist()) new_eig_pairs = zip(*new_eig_pairs) new_eig_pairs = np.array(new_eig_pairs) for cluster in range(2, len(new_eig_pairs[0]) + 1): clusterred_eig_pairs = new_eig_pairs[:, :cluster] L, T = fr.rotate_factors(clusterred_eig_pairs, 'varimax') file_name = '/home/striker/Factor-Analysis/Output/Clusters/RCM/' + str( inde) + '/' + str(countr) + '/' + str(cluster) + '.csv' rotated_matrix = open(file_name, 'w') file_all_cluster = '/home/striker/Factor-Analysis/Output/Clusters/All_Cluster/' + str( inde) + '/' + str(countr) + '/' + str(cluster) + '.csv' clusters_num = open(file_all_cluster, 'w') clusters = [] for row_i in range(len(L)): first_qtr = 0 second_qtr = 0 third_qtr = 0 for col_i in range(len(L[0])): value = abs(L[row_i][col_i]) if value > 0.65: first_qtr = col_i + 1 elif 0.5 < value < 0.65: second_qtr = col_i + 1 elif 0.35 < value < 0.5: third_qtr = col_i + 1 rotated_matrix.write(str(round(L[row_i][col_i], 5)) + ',') if first_qtr == 0 and second_qtr != 0: first_qtr = second_qtr elif first_qtr == 0 and third_qtr != 0: first_qtr = third_qtr rotated_matrix.write(str(first_qtr) + '\n') clusters.append(str(first_qtr)) cluster_index = 0 for ind in range(0, 56): file_val = temp_pd_A[ind].sum() if float(file_val) == float(0.0): clusters_num.write(str(ind + 1) + ',' + str(99) + '\n') else: clusters_num.write( str(ind + 1) + ',' + str(clusters[cluster_index]) + '\n') cluster_index += 1 return len(new_eig_pairs[0]) + 1
import numpy as np import factor_rotation as fr #rotate M towards target with orthogonal matrix T M = np.array([[1.10061095, 0.47713676], [1.30095568, -0.16730989], [1.6787652, -1.234039], [0.42456929, -1.28744732], [0.47105995, 0.85757736], [-0.05816789, 0.31683709], [-1.3511985, -0.11610641], [1.80523345, -0.14549883]]) M_target = np.array([[1.15424697, 0.2724154], [0.7893224, 0.66576866], [-0.71227541, 0.55254571], [-0.84737515, 0.41528169], [-0.12133101, -1.28176304], [-0.60248373, -0.5405648], [0.45355659, 0.54495004], [0.62044144, -1.83902599]]) #analytic method T_analytic = fr.target_rotation(M, M_target) #numerical method using a gradient projection algorithm (GPA) L, T = fr.rotate_factors(M, 'target', M_target, 'orthogonal') print(np.allclose(T, T_analytic, atol=1e-4)) #numerical method using a gradient projection algorithm (GPA) with lower level functions #define objective function vgQ = lambda L=None, A=None, T=None: fr._gpa_rotation.vgQ_target( M_target, L=L, A=A, T=T) #define starting point T_start = T_analytic #solve L, phi, T, table = fr._gpa_rotation.GPA(M, T=T_analytic, vgQ=vgQ, rotation_method='orthogonal') #comparison if np.allclose(T, T_analytic): print(True)
pca = PCA(n_components=2) X = data[features] X = X - np.mean(X, axis=0) pca.fit(X) print pca.explained_variance_ratio_, sum(pca.explained_variance_ratio_) V = pca.components_.T S = np.diag(np.sqrt(X.shape[0]*pca.explained_variance_)) loadings = np.dot(V, S)/sqrt(X.shape[0]) print loadings L, T = fr.rotate_factors(loadings.T,'varimax') print L.T ''' plt.scatter(pca.transform(data[features])[:,0], pca.transform(data[features])[:,1]) plt.show() res = data['cnt'].values - model.predict(data[features]) for f in features: plt.scatter(data[f].values, res) plt.show() scores = np.array(scores)