contingency_matrix = metrics.cluster.contingency_matrix(y_true, y_pred) # Find optimal one-to-one mapping between cluster labels and true labels row_ind, col_ind = linear_sum_assignment(-contingency_matrix) # Return cluster accuracy return contingency_matrix[row_ind, col_ind].sum() / np.sum(contingency_matrix) digits = load_digits() data = scale(digits.data) pca = PCA(n_components=2).fit(data) reduced_data = PCA(n_components=2).fit_transform(data) reduced_data1=reduced_data.copy() plt.scatter(reduced_data[:,0],reduced_data[:,1]) plt.xlabel("X-Component") plt.ylabel("Y-Component") plt.show() y_true = digits.target print("\n\n************************Agglomerative *********************")
#----------------------------------------------------------------------------------------------# # Data outside # Files = pd.read_csv(FileName, header=None) SampleNumber = Files.columns.size - 1 # Sample Number FeatureNumber = len(Files) - 1 # Feature Number Labels = np.array(Files.values[0, 1:SampleNumber+1], dtype='int') # Labels Features = np.array(Files.values[1:FeatureNumber+1, 0], dtype='int') # Features Datas = Files.iloc[1:FeatureNumber+1, 1:SampleNumber+1] # Datas Datas = np.array(Datas.T) # sample * feature print('Sample Number : ', SampleNumber) print('Feature Number : ', FeatureNumber) #----------------------------------------------------------------------------------------------# # PCA Model # PCAmodel = PCA() PCAmodel.n_components = 2 # Default : None(all) , components number PCAmodel.copy = False # Default : True, copy the origin data PCAmodel.whiten = False # Default : False, let features with same var PCAmodel.random_state = 0 # Default : None PCAmodel.svd_solver = 'auto' # Default : auto #----------------------------------------------------------------------------------------------# # PCA Processing # Scaler = skl.preprocessing.StandardScaler() Scaler.mean_ = np.mean(Datas, axis=0) Scaler.scale_ = np.std(Datas, axis=0, ddof=1) ScaledDatas = Scaler.transform(Datas) NewDatas = PCAmodel.fit_transform(ScaledDatas) print('-------------------------------') print('Pragma : PCA') print('Reserved Comp Number : ', PCAmodel.n_components_) print('Reserved Comp Ratio : ', PCAmodel.explained_variance_ratio_) # Larger is better print('Reserved Comp Var : ', PCAmodel.explained_variance_) # Larger is better