def change_cwd_data(self): cwd = os.getcwd() os.chdir( DATA_PATH.format(dim=self.__dim, reference=self.__reference_distribution)) return cwd
from Constants.Constants import SIZE_SET, DIMENSION_SET, SEED from Constants.Expanded_Constants import REFERENCE_LIST from Constants.Storage_Constants import DATA_PATH n_components = [] explained_variance_ratio = [] for dim in DIMENSION_SET: print('Dimension:', dim) size_n_components = [] size_explained_var = [] for size in SIZE_SET: dist_n_components = [] dist_explained_var = [] print('\t' + str(size)) for reference in REFERENCE_LIST: os.chdir(DATA_PATH.format(dim=dim, reference=reference)) size_files = glob.glob('*Data Set ' + str(size) + '.parquet_' + reference + '_gz') feature_columns = ['U' + str(i) for i in range(size + 1) ] + ['V' + str(i) for i in range(size + 1)] size_df = pd.concat(map(lambda x: pd.read_parquet(x), size_files), ignore_index=True)[feature_columns] for percent in [0.85, 0.90, 0.95, 0.99, 0.999]: print('\t\t' + reference + '\t' + str(percent), end=' \t') pca = PCA(n_components=percent, random_state=SEED) pca.fit(size_df) np_values = pca.transform(size_df) print(str(pca.n_components_), flush=True) n_components.append(pca.n_components) dist_n_components.append(pca.n_components) print('\t\t\tExplained Variance:',
import glob import os import numpy as np import pandas as pd from matplotlib import pyplot as plt from Constants.Constants import SIZE_SET from Constants.Expanded_Constants import REFERENCE_LIST from Constants.Storage_Constants import DATA_PATH for size in SIZE_SET: print(1, size, 'Complete') for reference in REFERENCE_LIST: print('\t', reference, sep='') os.chdir(DATA_PATH.format(dim=1, reference=reference)) cluster = glob.glob('*Set ' + str(size) + '.parquet_' + reference + '_gz') plt.close('all') for file in cluster: dist = file.split(' ')[:1] dist_name = ' '.join(dist) file_df = pd.read_parquet(file).astype(dtype=np.float32) print('\t\t', file, sep='') u = file_df['U'].to_numpy() v = file_df['V'].to_numpy() plt.plot(u, v, '.', label=dist_name) plt.legend() plt.xlabel('U') plt.ylabel('V') plt.title('Dim 1 Size ' + str(size) + ' ' + reference)