def _collect_result(test_function: str, N: int, noise_std: float, random: bool, gps: Tuple[str, ...], M: int): store = store_path(test_function, N, noise_std, random, M) destination = store / "results" destination.mkdir(mode=0o777, parents=True, exist_ok=True) for gp in gps: for sobol in (True, False): if sobol: lin_trans = linear_transformation(store) frame = data.Frame(destination / "{0}.{1}".format(gp, "True_Theta.csv"), DataFrame(lin_trans)) lin_trans = transpose(lin_trans) params = ("Theta.csv", "S.csv", "S1.csv") else: params = ("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv", "test_stats.csv") for param in params: results = None avg = None for k in range(K): source = (store / "fold.{0:d}".format(k)) / gp source = source / "sobol" if sobol else source / "kernel" if param == "lengthscale.csv" else source result = data.Frame(source / param, **model.base.Model.CSV_PARAMETERS).df.copy(deep=True) result.insert(0, "fold", full(result.shape[0], k), True) if k == 0: results = result avg = result / K else: results = concat([results, result], axis=0, ignore_index=True, sort=False) avg += result / K avg.loc[:, 'fold'] = 'mean' results = concat([results, avg], axis=0, ignore_index=True, sort=False) frame = data.Frame(destination / "{0}.{1}".format(gp, param), results)
def _test_stats(k: int, gp_path: Path) -> data.Frame: test = data.Frame(gp_path / "__test__.csv").df.copy() Y = test['Y'].values mean_ = test['Predictive Mean'].values std = test['Predictive Std'].values err = abs(Y - mean_) outliers = floor_divide(err, 2 * std) df = DataFrame({'fold': k, 'RMSE': sqrt(mean(err ** 2)) / 4, 'Prediction Std': mean(std), 'Outliers': count_nonzero(outliers) / len(std)}, index=[0]) return data.Frame(gp_path / "test_stats.csv", df)
def _collect_test_stats(M, N, function_name, random, noisy): noisy_str = NORMAL_CDF_DIR if noisy else NOISELESS_DIR source_store = store_dir(M, N, function_name, random, noisy) for k in range(FOLDS): fold = data.Fold(source_store, k) gp_dir = fold.dir / "ard" frame = data.Frame( gp_dir / "test_stats.csv", _test_stats(k, data.Frame(gp_dir / "__test__.csv").df.copy()))
def _collect_test_stats(test_function: str, N: int, noise_std: float, random: bool, gp: str, M: int = 5): source_store = store_path(test_function, N, noise_std, random, M) for k in range(K): fold = data.Fold(source_store, k) gp_dir = fold.dir / gp frame = data.Frame( gp_dir / "test_stats.csv", _test_stats(k, data.Frame(gp_dir / "__test__.csv").df.copy()))
def _collect_result(M: int, N: int, function_name: str, random: bool, noisy: bool): noisy_str = NORMAL_CDF_DIR if noisy else NOISELESS_DIR source_store = store_dir(M, N, function_name, random, noisy) destination = (BASE_PATH / "results") / source_store.name destination.mkdir(mode=0o777, parents=True, exist_ok=True) for gp in ("ard", "rom.optimized", "rom.reduced"): for sobol in (True, False): if sobol: lin_trans = linear_transformation(source_store) frame = data.Frame( destination / "{0}.{1}.{2}".format(noisy_str, gp, "True_Theta.csv"), DataFrame(lin_trans)) lin_trans = transpose(lin_trans) params = ("Theta.csv", "S.csv", "S1.csv") else: params = (("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv") if gp == "rom.optimized" else ("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv", "test_stats.csv")) for param in params: results = None avg = None for k in range(FOLDS): source = (source_store / "fold.{0:d}".format(k)) / gp source = source / "sobol" if sobol else source / "kernel" if param == "lengthscale.csv" else source result = data.Frame( source / param, **model.base.Model.CSV_PARAMETERS).df.copy(deep=True) if param == "Theta.csv": # TODO: May not need this signs = result.values @ lin_trans signs = sign(diag(signs)) signs.shape = (signs.shape[0], 1) result *= signs result.insert(0, "fold", full(result.shape[0], k), True) if k == 0: results = result avg = result / FOLDS else: results = concat([results, result], axis=0, ignore_index=True) avg += result / FOLDS avg.loc[:, 'fold'] = 'mean' results = concat([results, avg], axis=0, ignore_index=True) frame = data.Frame( destination / "{0}.{1}.{2}".format(noisy_str, gp, param), results)
def _collect_result(test_function: str, N: int, noise_std: float, random: bool, gp: str, M: int = 5): source_store = store_path(test_function, N, noise_std, random, M) destination = RESULTS_PATH / source_store.name destination.mkdir(mode=0o777, parents=True, exist_ok=True) for sobol in (False, ): if sobol: lin_trans = linear_transformation(source_store) frame = data.Frame( destination / "{0}.{1}".format(gp, "True_Theta.csv"), DataFrame(lin_trans)) lin_trans = transpose(lin_trans) params = ("Theta.csv", "S.csv", "S1.csv") else: params = (("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv") if gp == "rom.optimized" else ("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv", "test_stats.csv")) for param in params: results = None avg = None for k in range(K): source = (source_store / "fold.{0:d}".format(k)) / gp source = source / "sobol" if sobol else source / "kernel" if param == "lengthscale.csv" else source result = data.Frame( source / param, **model.base.Model.CSV_PARAMETERS).df.copy(deep=True) # if param == "Theta.csv": # TODO: May not need this # signs = result.values @ lin_trans # signs = sign(diag(signs)) # signs.shape = (signs.shape[0], 1) # result *= signs result.insert(0, "fold", full(result.shape[0], k), True) if k == 0: results = result avg = result / K else: results = concat([results, result], axis=0, ignore_index=True) avg += result / K avg.loc[:, 'fold'] = 'mean' results = concat([results, avg], axis=0, ignore_index=True) frame = data.Frame(destination / "{0}.{1}".format(gp, param), results)
def summarise_results(test_functions: Tuple[str, ...], Ns: Tuple[int, ...], noise_stds: Tuple[float, ...], randoms: Tuple[bool, ...], gps: Tuple[str, ...], Ms: Tuple[int, ...] = (5, )): for M in Ms: for test_function in test_functions: Mu = choose_Mu(test_function) for random in randoms: destination = RESULTS_PATH / "{0}.{1:d}.{2}".format(test_function, M, _random_str(random)) destination.mkdir(mode=0o777, parents=True, exist_ok=True) for gp in gps: for sobol in (True, False): if sobol: params = (("S.csv", "S1.csv", "Theta.csv", "True_Theta.csv", "Theta_Analyzed.csv") if random and gp == "rom.optimized.reduced" else ("S.csv", "S1.csv", "Theta.csv")) else: params = ("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv", "test_stats.csv") for param in params: is_initial = True for N in Ns: for noise_std in noise_stds: results_path = store_path(test_function, N, noise_std, random, M) / "results" std = loadtxt(results_path / 'std.csv') source = results_path / "{0}.{1}".format(gp, param) result = data.Frame(source, **model.base.Model.CSV_PARAMETERS).df.copy(deep=True) result = (result.copy(deep=True) if param == "True_Theta.csv" else result.loc[result['fold'] == 'mean'].drop('fold', axis=1).copy(deep=True)) result.insert(0, "N", full(result.shape[0], int(N/2), dtype=int), True) result.insert(0, "Noise", full(result.shape[0], noise_std/std), True) if is_initial: results = result is_initial = False else: results = concat([results, result], axis=0, ignore_index=True, sort=False) results.to_csv(destination / "{0}.{1}".format(gp, param), index=False) results.to_csv(destination / "formatted.{0}.{1}".format(gp, param), float_format='%.4f', index=False)
def synopsise(test_functions: Tuple[str, ...], randoms: Tuple[bool, ...], gps: Tuple[str, ...]): destination = SUMMARY_PATH / "synopsis" destination.mkdir(mode=0o777, parents=True, exist_ok=True) params = ("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv", "test_stats.csv") for param in params: is_initial = True for test_function in test_functions: for random in randoms: source_path = SUMMARY_PATH / (test_function + '.' + _random_str(random)) for gp in gps: result = data.Frame(source_path / "{0}.{1}".format(gp, param), header=0, index_col=False).df.copy(deep=True) result.insert(0, "GP", full(result.shape[0], gp), True) result.insert(0, "Random Rotation", full(result.shape[0], random), True) result.insert(0, "Test Function", full(result.shape[0], test_function), True) if is_initial: results = result is_initial = False else: results = concat([results, result], axis=0, ignore_index=True, sort=False) results.to_csv(destination / "{0}".format(param), index=False)
def _analyze_theta(M: int, N: int, function_name: str, noisy: bool) -> DataFrame: Mu = choose_Mu(function_name) noisy_str = NORMAL_CDF_DIR if noisy else NOISELESS_DIR random_path = ((BASE_PATH / "results") / (function_name + ".{0:d}.{1:d}.random").format(N, M)) theta_true = data.Frame( random_path / (noisy_str + ".rom.optimized.True_Theta.csv"), **model.base.Model.CSV_PARAMETERS).df.values theta_csv = (noisy_str + ".rom.optimized.Theta.csv") rom_path = ((BASE_PATH / "results") / (function_name + ".{0:d}.{1:d}.rom").format(N, M)) theta_rom_df = data.Frame(rom_path / theta_csv, **model.base.Model.CSV_PARAMETERS).df theta_random_df = data.Frame(random_path / theta_csv, **model.base.Model.CSV_PARAMETERS).df for k in range(FOLDS): theta_rom = theta_rom_df.loc[theta_rom_df['fold'] == str( k)].values[:, 1:].copy().astype(float) theta_random = theta_random_df.loc[theta_random_df['fold'] == str( k)].values[:, 1:].transpose().copy().astype(float) h = theta_rom @ theta_true @ theta_random resultA = _singular_values(h[:Mu, :Mu]) resultI = _singular_values(h[Mu:, Mu:]) result = concatenate((resultA, resultI), axis=1) result_df = DataFrame(result) result_df.insert(0, "fold", full(result.shape[0], k, dtype=int), True) if k == 0: results = DataFrame(result_df) mean = results.copy(deep=True) / FOLDS else: results = concat([results, result_df], axis=0, ignore_index=True) mean += result_df / FOLDS mean.loc[:, 'fold'] = 'mean' results = concat([results, mean], axis=0, ignore_index=True) results.to_csv( random_path / "{0}.{1}.Theta_Analyzed.csv".format(noisy_str, "rom.optimized")) results.to_csv(random_path / "{0}.{1}.Theta_Analyzed.formatted.csv".format( noisy_str, "rom.optimized"), float_format='%.4f') return mean
def _analyze_theta(test_function: str, N: int, noise_std: float, M: int) -> DataFrame: Mu = choose_Mu(test_function) random = {flag: (store_path(test_function, N, noise_std, flag, M) / "results") for flag in (True, False)} theta_df_random = {flag: data.Frame(random[flag] / "rom.optimized.Theta.csv", **model.base.Model.CSV_PARAMETERS).df for flag in (True, False)} theta_true = data.Frame(random[True] / "rom.optimized.True_Theta.csv", **model.base.Model.CSV_PARAMETERS).df.values for k in range(K): theta_random = {flag: theta_df_random[flag].loc[theta_df_random[flag]['fold'] == str(k)].values[:, 1:].copy().astype(float) for flag in (True, False)} h = theta_random[False] @ theta_true @ theta_random[True].transpose() resultA = _singular_values(h[:Mu, :Mu]) resultI = _singular_values(h[Mu:, Mu:]) result = concatenate((resultA, resultI), axis=1) result_df = DataFrame(result) result_df.insert(0, "fold", full(result.shape[0], k, dtype=int), True) if k == 0: results = DataFrame(result_df) mean = results.copy(deep=True) / K else: results = concat([results, result_df], axis=0, ignore_index=True, sort=False) mean += result_df / K mean.loc[:, 'fold'] = 'mean' results = concat([results, mean], axis=0, ignore_index=True, sort=False) results.to_csv(random[True] / "rom.optimized.reduced.Theta_Analyzed.csv") return mean
def _run_test(M, N, function_name, random, noisy): noisy_str = NORMAL_CDF_DIR if noisy else NOISELESS_DIR source_store = store_dir(M, N, function_name, random, noisy) Mu = choose_Mu(function_name) kernel_parameters = model.gpy_.Kernel.ExponentialQuadratic.Parameters( lengthscale=full((1, Mu), 0.2, dtype=float)) parameters = model.gpy_.GP.DEFAULT_PARAMETERS._replace( kernel=kernel_parameters, e_floor=1E-5, e=1E-10) for k in range(FOLDS): fold = data.Fold(source_store, k, Mu) dst = fold.dir / "rom.reduced" if dst.exists(): shutil.rmtree(dst) shutil.copytree(src=fold.dir / "rom.optimized", dst=dst) gp = model.gpy_.GP(fold, "rom.reduced", parameters) gp.optimize(model.gpy_.GP.DEFAULT_OPTIMIZER_OPTIONS) frame = data.Frame(gp.dir / "test_stats.csv", _test_stats(k, gp.test().df.copy()))
def summarise_results(Ms: Tuple[int], Ns: Tuple[int], function_names: Tuple[str]): for M in Ms: for function_name in function_names: Mu = choose_Mu(function_name) for random in (True, False): destination = (BASE_PATH / "results") / ( function_name + (".random.{0:d}" if random else ".rom.{0:d}").format(M)) destination.mkdir(mode=0o777, parents=True, exist_ok=True) for gp in ("ard", "rom.optimized", "rom.reduced"): for sobol in (True, False): if sobol: params = () if gp == "rom.reduced" else ( "S.csv", "S1.csv", "Theta.csv", "True_Theta.csv", "Theta_Analyzed.csv") else: params = (("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv") if gp == "rom.optimized" else ("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv", "test_stats.csv")) for param in params: if (param == "Theta_Analyzed.csv" and ((not random) or (gp == "ard"))): continue is_initial = True for N in Ns: for noisy in (False, True): noisy_str = NORMAL_CDF_DIR if noisy else NOISELESS_DIR noise = 0.025 if noisy else 0 source = ( ((BASE_PATH / "results") / store_dir(M, N, function_name, random, noisy).name) / "{0}.{1}.{2}".format( noisy_str, gp, param)) result = data.Frame( source, **model.base.Model. CSV_PARAMETERS).df.copy(deep=True) result = ( result.copy(deep=True) if param == "True_Theta.csv" else result.loc[result['fold'] == 'mean']. drop('fold', axis=1).copy(deep=True)) result.insert( 0, "N", full(result.shape[0], int(N / 2), dtype=int), True) result.insert(0, "Noise", full(result.shape[0], noise), True) if is_initial: results = result is_initial = False else: results = concat([results, result], axis=0, ignore_index=True) results.to_csv(destination / "{0}.{1}".format(gp, param), index=False) results.to_csv( destination / "{0}.formatted.{1}".format(gp, param), float_format='%.4f', index=False)
def summarise_results(test_functions: Tuple[str, ...], Ns: Tuple[int, ...], noise_stds: Tuple[float, ...], randoms: Tuple[bool, ...], gps: Tuple[str, ...], Ms: Tuple[int, ...] = (5, )): for test_function in test_functions: for random in randoms: destination = SUMMARY_PATH / (test_function + '.' + _random_str(random)) destination.mkdir(mode=0o777, parents=True, exist_ok=True) for gp in gps: for sobol in (False, ): if sobol: params = () if gp == "rom.reduced" else ( "S.csv", "S1.csv", "Theta.csv", "True_Theta.csv", "Theta_Analyzed.csv") else: params = (("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv") if gp == "rom.optimized" else ("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv", "test_stats.csv")) for param in params: if (param == "Theta_Analyzed.csv" and ((not random) or (gp == "ard"))): continue is_initial = True for M in Ms: Mu = M for N in Ns: for noise_std in noise_stds: source_store = store_path( test_function, N, noise_std, random, M) source = (RESULTS_PATH / source_store.name ) / "{0}.{1}".format(gp, param) result = data.Frame( source, **model.base.Model. CSV_PARAMETERS).df.copy(deep=True) result = ( result.copy(deep=True) if param == "True_Theta.csv" else result.loc[result['fold'] == 'mean']. drop('fold', axis=1).copy(deep=True)) result.insert( 0, "N", full(result.shape[0], int(N / 2), dtype=int), True) result.insert( 0, "Noise", full(result.shape[0], noise_std), True) result.insert(0, "M", full(result.shape[0], M), True) if is_initial: results = result is_initial = False else: results = concat([results, result], axis=0, ignore_index=True, sort=False) results.to_csv(destination / "{0}.{1}".format(gp, param), index=False)