def test_ds_concat(ds, n): all_dims = [ds[kk].dims for kk in ds] common_dims = sorted(intersect_seq(all_dims)) n = min([n, len(common_dims) - 1]) assume(0 < n) keys_to_slice = common_dims[:n] ds_dict = {} vals = [ds.coords[kk].values.tolist() for kk in keys_to_slice] for vv in product(*vals): lookup = dict(zip(keys_to_slice, vv)) ds_dict[vv] = ds.sel(lookup, drop=True) xru.ds_concat(ds_dict, dims=keys_to_slice)
def do_baseline(args): # pragma: io """Alternate entry into the program without calling the actual main. """ # Load in the eval data and sanity check perf_ds, meta = XRSerializer.load_derived(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL_RESULTS) logger.info("Meta data from source file: %s" % str(meta["args"])) D = OrderedDict() for kk in perf_ds: perf_da = perf_ds[kk] D[(kk,)] = compute_baseline(perf_da) baseline_ds = ds_concat(D, dims=(cc.OBJECTIVE,)) # Keep in same order for cleanliness baseline_ds = baseline_ds.sel({cc.OBJECTIVE: list(perf_ds)}) assert list(perf_ds) == baseline_ds.coords[cc.OBJECTIVE].values.tolist() # Could optionally remove this once we think things have enough tests for kk in D: assert baseline_ds.sel({cc.OBJECTIVE: kk[0]}, drop=True).identical(D[kk]) # Now dump the results XRSerializer.save_derived(baseline_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.BASELINE)
def concat_experiments(all_experiments, ravel=False): """Aggregate the Datasets from a series of experiments into combined Dataset. Parameters ---------- all_experiments : typing.Iterable Iterable (possible from a generator) with the Datasets from each experiment. Each item in `all_experiments` is a pair containing ``(meta_data, data)``. See `load_experiments` for details on these variables, ravel : bool If true, ravel all studies to store batch suggestions as if they were serial. Returns ------- all_perf : :class:`xarray:xarray.Dataset` DataArray containing all of the `perf_da` from the experiments. The meta-data from the experiments are included as extra dimensions. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``. To convert the `uuid` to a trial, there must be an equal number of repetition in the experiments for each `TEST_CASE`, `METHOD` combination. Likewise, all of the experiments need an equal number of `ITER` and `SUGGEST`. If `ravel` is true, then the `SUGGEST` is singleton. all_time : :class:`xarray:xarray.Dataset` Dataset containing all of the `time_ds` from the experiments. The new dimensions are ``(ITER, TEST_CASE, METHOD, TRIAL)``. It has the same variables as `time_ds`. all_suggest : :class:`xarray:xarray.Dataset` DataArray containing all of the `suggest_ds` from the experiments. It has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``. all_sigs : dict(str, list(list(float))) Aggregate of all experiment signatures. """ all_perf = {} all_time = {} all_suggest = {} all_sigs = {} trial_counter = Counter() for (test_case, optimizer, uuid), (perf_ds, time_ds, suggest_ds, sig) in all_experiments: if ravel: raise NotImplementedError("ravel is deprecated. Just reshape in analysis steps instead.") case_key = (test_case, optimizer, trial_counter[(test_case, optimizer)]) trial_counter[(test_case, optimizer)] += 1 # Process perf data assert all(perf_ds[kk].dims == (ITER, SUGGEST) for kk in perf_ds) all_perf[case_key] = perf_ds # Process time data all_time[case_key] = summarize_time(time_ds) # Process suggestion data all_suggest_curr = all_suggest.setdefault(test_case, {}) all_suggest_curr[case_key] = suggest_ds # Handle the signatures all_sigs.setdefault(test_case, []).append(sig) assert min(trial_counter.values()) == max(trial_counter.values()), "Uneven number of trials per test case" # Now need to concat dict of datasets into single dataset all_perf = xru.ds_concat(all_perf, dims=(TEST_CASE, METHOD, TRIAL)) assert all(all_perf[kk].dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) for kk in all_perf) assert not any( np.any(np.isnan(all_perf[kk].values)) for kk in all_perf ), "Missing combinations of method and test case" all_time = xru.ds_concat(all_time, dims=(TEST_CASE, METHOD, TRIAL)) assert all(all_time[kk].dims == (ITER, TEST_CASE, METHOD, TRIAL) for kk in all_time) assert not any(np.any(np.isnan(all_time[kk].values)) for kk in all_time) assert xru.coord_compat((all_perf, all_time), (ITER, TEST_CASE, METHOD, TRIAL)) for test_case in all_suggest: all_suggest[test_case] = xru.ds_concat(all_suggest[test_case], dims=(TEST_CASE, METHOD, TRIAL)) assert all( all_suggest[test_case][kk].dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) for kk in all_suggest[test_case] ) assert not any(np.any(np.isnan(all_suggest[test_case][kk].values)) for kk in all_suggest[test_case]) assert xru.coord_compat((all_perf, all_suggest[test_case]), (ITER, METHOD, TRIAL)) assert all_suggest[test_case].coords[TEST_CASE].shape == (1,), "test case should be singleton" return all_perf, all_time, all_suggest, all_sigs
def concat_experiments(all_experiments, ravel=False): """Aggregate the Datasets from a series of experiments into combined Dataset. Parameters ---------- all_experiments : typing.Iterable Iterable (possible from a generator) with the Datasets from each experiment. Each item in `all_experiments` is a pair containing ``(meta_data, data)``. The `meta_data` contains a `tuple` of `str` with ``test_case, optimizer, uuid``. The `data` contains a tuple of ``(perf_da, time_ds, sig)``. The `perf_da` is an :class:`xarray:xarray.DataArray` containing the evaluation results with dimensions ``(ITER, SUGGEST)``. The `time_ds` is an :class:`xarray:xarray.Dataset` containing the timing results of the form accepted by `summarize_time`. The coordinates must be compatible with `perf_da`. Finally, `sig` contains the `test_case` signature and must be `list(float)`. ravel : bool If true, ravel all studies to store batch suggestions as if they were serial. Returns ------- all_perf : :class:`xarray:xarray.DataArray` DataArray containing all of the `perf_da` from the experiments. The meta-data from the experiments are included as extra dimensions. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``. To convert the `uuid` to a trial, there must be an equal number of repetition in the experiments for each `TEST_CASE`, `METHOD` combination. Likewise, all of the experiments need an equal number of `ITER` and `SUGGEST`. If `ravel` is true, then the `SUGGEST` is singleton. all_time : :class:`xarray:xarray.Dataset` Dataset containing all of the `time_ds` from the experiments. The new dimensions are ``(ITER, TEST_CASE, METHOD, TRIAL)``. It has the same variables as `time_ds`. all_sigs : dict(str, list(list(float))) Aggregate of all experiment signatures. """ all_perf = {} all_time = {} all_sigs = {} trial_counter = Counter() for (test_case, optimizer, uuid), (perf_da, time_ds, sig) in all_experiments: if ravel: n_suggest = perf_da.sizes[SUGGEST] perf_da = _ravel_perf(perf_da) time_ds = _ravel_time(time_ds) optimizer = str_join_safe(ARG_DELIM, (optimizer, "p%d" % n_suggest), append=True) case_key = (test_case, optimizer, trial_counter[(test_case, optimizer)]) trial_counter[(test_case, optimizer)] += 1 # Process perf data assert perf_da.dims == (ITER, SUGGEST) all_perf[case_key] = perf_da # Process time data all_time[case_key] = summarize_time(time_ds) # Handle the signatures all_sigs.setdefault(test_case, []).append(sig) assert min(trial_counter.values()) == max( trial_counter.values()), "Uneven number of trials per test case" # Now need to concat dict of datasets into single dataset all_perf = xru.da_concat(all_perf, dims=(TEST_CASE, METHOD, TRIAL)) assert all_perf.dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) assert not np.any(np.isnan( all_perf.values)), "Missing combinations of method and test case" all_time = xru.ds_concat(all_time, dims=(TEST_CASE, METHOD, TRIAL)) assert all(all_time[kk].dims == (ITER, TEST_CASE, METHOD, TRIAL) for kk in all_time) assert not any(np.any(np.isnan(all_time[kk].values)) for kk in all_time) assert xru.coord_compat((all_perf, all_time), (ITER, TEST_CASE, METHOD, TRIAL)) return all_perf, all_time, all_sigs