from my_plot import save_plot_os, save_plot_comparison from util import get_file, extract_columns # Directories # macos_out = get_file("macos", "matlabOutput.csv") matlab_ubuntu = get_file("matlab/output/ubuntu", "matlabOutput.csv") matlab_windows = get_file("matlab/output/windows", "matlabOutput.csv") cpp_ubuntu_native_32 = get_file("cpp/output/ubuntu", "cppOutput-native-32.csv") cpp_ubuntu_native_64 = get_file("cpp/output/ubuntu", "cppOutput-native-64.csv") cpp_ubuntu_mkl = get_file("cpp/output/ubuntu", "cppOutput-mkl.csv") cpp_windows_mkl = get_file("cpp/output/windows", "cppOutput-mkl.csv") cpp_windows_native_64 = get_file("cpp/output/windows", "cppOutput-native-64.csv") # columns_macos = extract_columns(macos_out) columns_matlab_ubuntu = extract_columns(matlab_ubuntu) columns_matlab_windows = extract_columns(matlab_windows) columns_cpp_ubuntu_native_32 = extract_columns(cpp_ubuntu_native_32) columns_cpp_ubuntu_native_64 = extract_columns(cpp_ubuntu_native_64) columns_cpp_ubuntu_mkl = extract_columns(cpp_ubuntu_mkl) columns_cpp_windows_mkl = extract_columns(cpp_windows_mkl) columns_cpp_windows_native_64 = extract_columns(cpp_windows_native_64) ## Single plot ### NO # save_plot_os(columns_macos, "rows", "Matrix Size", "results/macos_OnSize.pdf") # save_plot_os(columns_macos, "nonZeros", "Non Zeros", "results/macos_OnNonZeros.pdf") ### NO save_plot_os(columns_matlab_ubuntu, "rows", "Matrix Size", "results/single/matlab_ubuntu_OnSize.pdf")
def __init__(self, p_type='classification', algorithms=None, hyperparameters=None, verbose=False, n_cores=mp.cpu_count(), runtime_limit=512, dataset_ratio_threshold=100, selection_method='min_variance', scalarization='D', error_matrix=None, runtime_matrix=None, new_row=None, build_ensemble=True, ensemble_method='greedy', runtime_predictor='KNeighborsRegressor', solver='scipy', **stacking_hyperparams): # TODO: check if arguments to constructor are valid; set to defaults if not specified assert selection_method in {'qr', 'min_variance', 'random'}, "The method to select entries to sample must be " \ "either qr (QR decomposition), min_variance (minimize variance with time constraints), or random (time-constrained random selection, for testing purpose)." with open(os.path.join(DEFAULTS, p_type + '.json')) as file: defaults = json.load(file) # attributes of ML problem self.p_type = p_type.lower() self.algorithms = algorithms or defaults['algorithms'] self.hyperparameters = hyperparameters or defaults['hyperparameters'] self.verbose = verbose # computational considerations self.n_cores = n_cores self.runtime_limit = runtime_limit # sample column selection self.selection_method = selection_method self.scalarization = scalarization # error matrix attributes # TODO: determine whether to generate new error matrix or use default/subset of default self.error_matrix = util.extract_columns( ERROR_MATRIX, self.algorithms, self.hyperparameters) if error_matrix is None else error_matrix self.runtime_matrix = util.extract_columns( RUNTIME_MATRIX, self.algorithms, self.hyperparameters) if runtime_matrix is None else runtime_matrix assert util.check_dataframes(self.error_matrix, self.runtime_matrix) self.column_headings = np.array( [eval(heading) for heading in list(self.error_matrix)]) self.X, self.Y, _ = linalg.pca(self.error_matrix.values, rank=min(self.error_matrix.shape) - 1) # sampled & fitted models self.new_row = new_row or np.zeros((1, self.error_matrix.shape[1])) self.sampled_indices = set() self.sampled_models = [None] * self.error_matrix.shape[1] self.fitted_indices = set() self.fitted_models = [None] * self.error_matrix.shape[1] # ensemble attributes self.build_ensemble = build_ensemble self.ensemble_method = ensemble_method self.stacking_hyperparams = stacking_hyperparams if self.build_ensemble: self.ensemble = Ensemble(self.p_type, self.ensemble_method, self.stacking_hyperparams) else: self.ensemble = Model_collection(self.p_type) # runtime predictor self.runtime_predictor = runtime_predictor self.dataset_ratio_threshold = dataset_ratio_threshold
from util import get_file, extract_columns, reshape_columns from visualisation import comparison_result import numpy as np input = get_file("results", "dct2_comparison.2.csv") columns = extract_columns(input) n = columns["n"][-1:] my = columns["my"][-4:] orig = columns["orig"][-4:] reshaped_columns = reshape_columns(columns) my_mean = [] orig_mean = [] for i in range(len(reshaped_columns["iteration"])): my_mean.append(np.mean(reshaped_columns["my"][i, :])) orig_mean.append(np.mean(reshaped_columns["orig"][i, :])) size = np.append(reshaped_columns["n"][:, 0], np.array([n])) my_mean.append(np.mean(my)) orig_mean.append(np.mean(orig)) comparison_result(size, [my_mean, orig_mean], ["Our dctn", "SciPy dctn"], "DCT2", "results/presentation.pdf")