def test_linear_rescale_inverse(args): X, lb0, ub0, lb1, ub1, enforce_bounds = args enforce_bounds = enforce_bounds >= 0 # Use sorted because hypothesis doesn't like using assume too often lb0, ub0 = sorted([lb0, ub0]) lb1, ub1 = sorted([lb1, ub1]) assume(lb0 < ub0) assume(lb1 < ub1) # Can't expect numerics to work well in these extreme cases: assume((ub0 - lb0) < 1e3 * (ub1 - lb1)) if enforce_bounds: X = np.clip(X, lb0, ub0) X_ = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=enforce_bounds) X_ = np_util.linear_rescale(X_, lb1, ub1, lb0, ub0, enforce_bounds=enforce_bounds) assert close_enough(X_, X)
def test_linear_rescale_bounds(args): lb0, ub0, lb1, ub1 = args # Use sorted because hypothesis doesn't like using assume too often lb0, ub0 = sorted([lb0, ub0]) lb1, ub1 = sorted([lb1, ub1]) assume(lb0 < ub0) assume(lb1 <= ub1) lb1_ = np_util.linear_rescale(lb0, lb0, ub0, lb1, ub1) assert close_enough(lb1, lb1_) ub1_ = np_util.linear_rescale(ub0, lb0, ub0, lb1, ub1) assert close_enough(ub1, ub1_)
def test_real_range_unwarp_warp(warp, args): x_w, range_ = args if warp == "log": range_ = range_[range_ > 0] if warp == "logit": range_ = range_[(0 < range_) & (range_ < 1)] range_ = np.sort(range_) assume(len(range_) == 2 and range_[0] < range_[1]) range_warped = sp.WARP_DICT[warp](range_) x_w = np.clip(x_w, range_warped[0], range_warped[1]) S = sp.Real(warp=warp, range_=range_) # Test bounds lower, upper = S.get_bounds().T x_w = linear_rescale(x_w, lb0=-1000, ub0=1000, lb1=lower, ub1=upper) x = S.unwarp(x_w) assert x_w.shape == x.shape + (1,) assert x.dtype == range_.dtype assert x.dtype == S.dtype x2 = S.validate(x) assert close_enough(x, x2) x_w2 = S.warp(x) assert x_w2.shape == x_w.shape x_w3 = S.validate_warped(x_w2) assert close_enough(x_w2, x_w3) assert close_enough(x_w, x_w2)
def test_linear_rescale_bound_modes(args): X, lb0, ub0, lb1, ub1 = args # Use sorted because hypothesis doesn't like using assume too often lb0, ub0 = sorted([lb0, ub0]) lb1, ub1 = sorted([lb1, ub1]) assume(lb0 < ub0) assume(lb1 <= ub1) X = np.clip(X, lb0, ub0) Y1 = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=False) Y2 = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=True) assert close_enough(Y1, Y2)
def test_random_search_suggest_diff(api_args, n_suggest, seed): # Hard to know how many iters needed for arbitrary space that we need to # run so that we don't get dupes by chance. So, for now, let's just stick # with this simple space. dim = {"space": "linear", "type": "real", "range": [1.0, 5.0]} # Use at least 10 n_suggest to make sure don't get same answer by chance X_w, y = api_args D = X_w.shape[1] param_names = ["x%d" % ii for ii in range(5)] meta = dict(zip(param_names, [dim] * D)) # Get the unwarped X S = sp.JointSpace(meta) lower, upper = S.get_bounds().T X_w = linear_rescale(X_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper) X = S.unwarp(X_w) S.validate(X) seed = seed // 2 # Keep in bounds even after add 7 x_guess = suggest_dict(X, y, meta, n_suggest, random=np.random.RandomState(seed)) # Use diff seed to intentionally get diff result x_guess2 = suggest_dict(X, y, meta, n_suggest, random=np.random.RandomState(seed + 7)) # Check types too assert len(x_guess) == n_suggest assert len(x_guess2) == n_suggest assert not np.all(x_guess == x_guess2) # Make sure validated S.validate(x_guess) S.validate(x_guess2) # Test sanity of output D, = lower.shape x_guess_w = S.warp(x_guess) assert type(x_guess_w) == np.ndarray assert x_guess_w.dtype.kind == "f" assert x_guess_w.shape == (n_suggest, D) assert x_guess_w.shape == (n_suggest, D) assert np.all(x_guess_w <= upper) x_guess_w = S.warp(x_guess2) assert type(x_guess_w) == np.ndarray assert x_guess_w.dtype.kind == "f" assert x_guess_w.shape == (n_suggest, D) assert x_guess_w.shape == (n_suggest, D) assert np.all(x_guess_w <= upper)
def test_linear_rescale_inner(args): X, lb0, ub0, lb1, ub1 = args # Use sorted because hypothesis doesn't like using assume too often lb0, ub0 = sorted([lb0, ub0]) lb1, ub1 = sorted([lb1, ub1]) assume(lb0 < ub0) assume(lb1 <= ub1) X = np.clip(X, lb0, ub0) X = np_util.linear_rescale(X, lb0, ub0, lb1, ub1) assert np.all(X <= ub1) assert np.all(lb1 <= X)
def postwarp(self, xxw): """Extra work needed to undo the Gaussian space representation.""" xx = {} for arg_name, vv in xxw.items(): assert np.isscalar(vv) space = self.space[arg_name] if space is not None: # Now make std Gaussian apriori vv = norm.cdf(vv) # Now make uniform on [0, 1] (lb, ub), = space.get_bounds() vv = linear_rescale(vv, 0, 1, lb, ub) # Warp so we think it is apriori uniform in [a, b] vv = space.unwarp([vv]) assert np.isscalar(vv) xx[arg_name] = vv return xx
def log_mean_score_json(evals, iters): assert evals.shape == (len(OBJECTIVE_NAMES), ) assert not np.any(np.isnan(evals)) log_msg = { cc.TEST_CASE: test_case_str, cc.METHOD: optimizer_str, cc.TRIAL: args[CmdArgs.uuid], cc.ITER: iters, } for idx, obj in enumerate(OBJECTIVE_NAMES): assert OBJECTIVE_NAMES[idx] == obj # Extract relevant rescaling info slice_ = {cc.TEST_CASE: test_case_str, OBJECTIVE: obj} best_opt = baseline_ds[cc.PERF_BEST].sel( slice_, drop=True).values.item() base_clip_val = baseline_ds[cc.PERF_CLIP].sel( slice_, drop=True).values.item() # Perform the same rescaling as found in experiment_analysis.compute_aggregates() score = linear_rescale(evals[idx], best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False) # Also, clip the score from below at -1 to limit max influence of single run on final average score = np.clip(score, -1.0, 1.0) score = score.item() # Make easiest for logging in JSON assert isinstance(score, float) # Note: This is not the raw score but the rescaled one! log_msg[obj] = score log_msg = json.dumps(log_msg) print(log_msg, flush=True) # One second safety delay to protect against subprocess stdout getting lost sleep(1)
def prewarp(self, xx): """Extra work needed to get variables into the Gaussian space representation.""" xxw = {} for arg_name, vv in xx.items(): assert np.isscalar(vv) space = self.space[arg_name] if space is not None: # Warp so we think it is apriori uniform in [a, b] vv = space.warp(vv) assert vv.size == 1 # Now make uniform on [0, 1], also unpack warped to scalar (lb, ub), = space.get_bounds() vv = linear_rescale(vv.item(), lb, ub, 0, 1) # Now make std Gaussian apriori vv = norm.ppf(vv) assert np.isscalar(vv) xxw[arg_name] = vv return xxw
def compute_aggregates(perf_da, baseline_ds): """Aggregate function evaluations in the experiments to get performance summaries of each method. Parameters ---------- perf_da : :class:`xarray:xarray.DataArray` Aggregate experimental results with each function evaluation in the experiments. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed to have no nan values. baseline_ds : :class:`xarray:xarray.Dataset` Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively. `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance. Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean. `PERF_BEST` is an estimate on the global minimum. Returns ------- agg_result : :class:`xarray:xarray.Dataset` Dataset with summary of performance for each method and test case combination. Contains variables: ``(PERF_MED, LB_MED, UB_MED, NORMED_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN)`` each with dimensions ``(ITER, METHOD, TEST_CASE)``. `PERF_MED` is a median summary of performance with `LB_MED` and `UB_MED` as error bars. `NORMED_MED` is a rescaled `PERF_MED` so we expect the optimal performance is 0, and random search gives 1 at all `ITER`. Likewise, `PERF_MEAN`, `LB_MEAN`, `UB_MEAN`, `NORMED_MEAN` are for mean performance. summary : :class:`xarray:xarray.Dataset` Dataset with overall summary of performance of each method. Contains variables ``(PERF_MED, LB_MED, UB_MED, PERF_MEAN, LB_MEAN, UB_MEAN)`` each with dimensions ``(ITER, METHOD)``. """ validate_agg_perf(perf_da, min_trial=1) assert isinstance(baseline_ds, xr.Dataset) assert tuple(baseline_ds[PERF_BEST].dims) == (TEST_CASE,) assert tuple(baseline_ds[PERF_CLIP].dims) == (TEST_CASE,) assert tuple(baseline_ds[PERF_MED].dims) == (ITER, TEST_CASE) assert tuple(baseline_ds[PERF_MEAN].dims) == (ITER, TEST_CASE) assert xru.coord_compat((perf_da, baseline_ds), (ITER, TEST_CASE)) assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds) # Now actually get the aggregate performance numbers per test case agg_result = xru.ds_like( perf_da, (PERF_MED, LB_MED, UB_MED, NORMED_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN), (ITER, METHOD, TEST_CASE), ) baseline_mean_da = xru.only_dataarray(xru.ds_like(perf_da, ["ref"], (ITER, TEST_CASE))) # Using values here since just clearer to get raw items than xr object for func_name for func_name in perf_da.coords[TEST_CASE].values: rand_perf_med = baseline_ds[PERF_MED].sel({TEST_CASE: func_name}, drop=True).values rand_perf_mean = baseline_ds[PERF_MEAN].sel({TEST_CASE: func_name}, drop=True).values best_opt = baseline_ds[PERF_BEST].sel({TEST_CASE: func_name}, drop=True).values base_clip_val = baseline_ds[PERF_CLIP].sel({TEST_CASE: func_name}, drop=True).values assert np.all(np.diff(rand_perf_med) <= 0), "Baseline should be decreasing with iteration" assert np.all(np.diff(rand_perf_mean) <= 0), "Baseline should be decreasing with iteration" assert np.all(rand_perf_med > best_opt) assert np.all(rand_perf_mean > best_opt) assert np.all(rand_perf_mean <= base_clip_val) baseline_mean_da.loc[{TEST_CASE: func_name}] = linear_rescale( rand_perf_mean, best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False ) for method_name in perf_da.coords[METHOD].values: # Take the minimum over all suggestion at given iter + sanity check perf_da curr_da = perf_da.sel({METHOD: method_name, TEST_CASE: func_name}, drop=True).min(dim=SUGGEST) assert curr_da.dims == (ITER, TRIAL) # Want to evaluate minimum so far during optimization perf_array = np.minimum.accumulate(curr_da.values, axis=0) # Compute median perf and CI on it med_perf, LB, UB = qt.quantile_and_CI(perf_array, EVAL_Q, alpha=ALPHA) assert med_perf.shape == rand_perf_med.shape agg_result[PERF_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = med_perf agg_result[LB_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = LB agg_result[UB_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = UB # Now store normed version, which is better for aggregation normed = linear_rescale(med_perf, best_opt, rand_perf_med, 0.0, 1.0, enforce_bounds=False) agg_result[NORMED_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = normed # Compute mean perf and CI on it perf_array = np.minimum(base_clip_val, perf_array) mean_perf = np.mean(perf_array, axis=1) assert mean_perf.shape == rand_perf_mean.shape EB = t_EB(perf_array, alpha=ALPHA, axis=1) assert EB.shape == rand_perf_mean.shape agg_result[PERF_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf agg_result[LB_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf - EB agg_result[UB_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf + EB # Now store normed version, which is better for aggregation normed = linear_rescale(mean_perf, best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False) agg_result[NORMED_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = normed assert not any(np.any(np.isnan(agg_result[kk].values)) for kk in agg_result) # Compute summary score over all test cases, summarize performance of each method summary = xru.ds_like( perf_da, (PERF_MED, LB_MED, UB_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN, LB_NORMED_MEAN, UB_NORMED_MEAN), (ITER, METHOD), ) summary[PERF_MED], summary[LB_MED], summary[UB_MED] = xr.apply_ufunc( qt.quantile_and_CI, agg_result[NORMED_MED], input_core_dims=[[TEST_CASE]], kwargs={"q": EVAL_Q, "alpha": ALPHA}, output_core_dims=[[], [], []], ) summary[PERF_MEAN] = agg_result[NORMED_MEAN].mean(dim=TEST_CASE) EB = xr.apply_ufunc(t_EB, agg_result[NORMED_MEAN], input_core_dims=[[TEST_CASE]]) summary[LB_MEAN] = summary[PERF_MEAN] - EB summary[UB_MEAN] = summary[PERF_MEAN] + EB normalizer = baseline_mean_da.mean(dim=TEST_CASE) summary[NORMED_MEAN] = summary[PERF_MEAN] / normalizer summary[LB_NORMED_MEAN] = summary[LB_MEAN] / normalizer summary[UB_NORMED_MEAN] = summary[UB_MEAN] / normalizer assert all(tuple(summary[kk].dims) == (ITER, METHOD) for kk in summary) return agg_result, summary