def aggregate_problem_results( runs: Dict[str, List[Experiment]], problem: BenchmarkProblem, # Model transitions, can be obtained as `generation_strategy.model_transitions` model_transitions: Optional[Dict[str, List[int]]] = None, ) -> BenchmarkResult: # Results will be put in {method -> results} dictionaries. objective_at_true_best: Dict[str, List[np.ndarray]] = {} fit_times: Dict[str, List[float]] = {} gen_times: Dict[str, List[float]] = {} for method, experiments in runs.items(): objective_at_true_best[method] = [] fit_times[method] = [] gen_times[method] = [] for experiment in experiments: assert ( problem.name in experiment.name ), "Problem and experiment name do not match." fit_time, gen_time = get_model_times(experiment=experiment) true_best_objective = extract_optimization_trace( experiment=experiment, problem=problem ) # Compute the things we care about # 1. True best objective value. objective_at_true_best[method].append(true_best_objective) # 2. Time fit_times[method].append(fit_time) gen_times[method].append(gen_time) # TODO: If `evaluate_suggested` is True on the problem # 3. True obj. value of model-predicted best # 4. True feasiblity of model-predicted best # 5. Model prediction MSE for each gen run # TODO: remove rows from <values>[method] of length different # from the length of other rows, log warning when removing return BenchmarkResult( objective_at_true_best={ m: np.array(v) for m, v in objective_at_true_best.items() }, # pyre-fixme[6]: [6]: Expected `Optional[Dict[str, Optional[List[int]]]]` # but got `Optional[Dict[str, List[int]]]` model_transitions=model_transitions, optimum=problem.optimal_value, fit_times=fit_times, gen_times=gen_times, )
def aggregate_problem_results( runs: Dict[str, List[Experiment]], problem: BenchmarkProblem, # Model transitions, can be obtained as `generation_strategy.model_transitions` model_transitions: Optional[Dict[str, List[int]]] = None, is_asynchronous: bool = False, **kwargs, ) -> BenchmarkResult: # Results will be put in {method -> results} dictionaries. true_performances: Dict[str, List[np.ndarray]] = {} fit_times: Dict[str, List[float]] = {} gen_times: Dict[str, List[float]] = {} exp = list(runs.values())[0][0] is_moo = isinstance(exp.optimization_config, MultiObjectiveOptimizationConfig) plot_pfs = is_moo and len( not_none(exp.optimization_config).objective.metrics) == 2 pareto_frontiers = {} if plot_pfs else None for method, experiments in runs.items(): true_performances[method] = [] fit_times[method] = [] gen_times[method] = [] for experiment in experiments: assert (problem.name in experiment.name ), "Problem and experiment name do not match." fit_time, gen_time = get_model_times(experiment=experiment) true_performance = extract_optimization_trace( experiment=experiment, problem=problem, is_asynchronous=is_asynchronous, **kwargs, ) # Compute the things we care about # 1. True best objective value. true_performances[method].append(true_performance) # 2. Time fit_times[method].append(fit_time) gen_times[method].append(gen_time) # TODO: If `evaluate_suggested` is True on the problem # 3. True obj. value of model-predicted best # 4. True feasiblity of model-predicted best # 5. Model prediction MSE for each gen run # only include pareto frontier for one experiment per method if plot_pfs: # pyre-ignore [16] pareto_frontiers[method] = get_observed_pareto_frontiers( experiment=experiment, # pyre-ignore [6] data=experiment.fetch_data(), )[0] # TODO: remove rows from <values>[method] of length different # from the length of other rows, log warning when removing return BenchmarkResult( true_performance={ m: np.array(v) for m, v in true_performances.items() }, # pyre-fixme[6]: [6]: Expected `Optional[Dict[str, Optional[List[int]]]]` # but got `Optional[Dict[str, List[int]]]` model_transitions=model_transitions, optimum=problem.optimal_value, fit_times=fit_times, gen_times=gen_times, is_multi_objective=is_moo, pareto_frontiers=pareto_frontiers, )