def should_stop_trials_early( self, trial_indices: Set[int], experiment: Experiment, **kwargs: Dict[str, Any], ) -> Dict[int, Optional[str]]: # Make sure that we can lookup data for the trial, # even though we won't use it in this dummy strategy data = experiment.lookup_data(trial_indices=trial_indices) if data.df.empty: raise Exception( f"No data found for trials {trial_indices}; " "can't determine whether or not to stop early.") return {idx: None for idx in trial_indices if idx % 2 == 1}
def _check_validity_and_get_data( self, experiment: Experiment) -> Optional[MapData]: """Validity checks and returns the `MapData` used for early stopping.""" if experiment.optimization_config is None: raise UnsupportedError( # pragma: no cover "Experiment must have an optimization config in order to use an " "early stopping strategy.") optimization_config = not_none(experiment.optimization_config) objective_name = optimization_config.objective.metric.name data = experiment.lookup_data() if data.df.empty: logger.info(f"{self.__class__.__name__} received empty data. " "Not stopping any trials.") return None if objective_name not in set(data.df["metric_name"]): logger.info(f"{self.__class__.__name__} did not receive data " "from the objective metric. Not stopping any trials.") return None if not isinstance(data, MapData): logger.info( f"{self.__class__.__name__} expects MapData, but the " f"data attached to experiment is of type {type(data)}. " "Not stopping any trials.") return None data = checked_cast(MapData, data) map_keys = data.map_keys if len(list(map_keys)) > 1: logger.info( f"{self.__class__.__name__} expects MapData with a single " "map key, but the data attached to the experiment has multiple: " f"{data.map_keys}. Not stopping any trials.") return None return data
def get_pareto_optimal_parameters( experiment: Experiment, generation_strategy: GenerationStrategy, use_model_predictions: bool = True, ) -> Optional[Dict[int, Tuple[TParameterization, TModelPredictArm]]]: """Identifies the best parameterizations tried in the experiment so far, using model predictions if ``use_model_predictions`` is true and using observed values from the experiment otherwise. By default, uses model predictions to account for observation noise. NOTE: The format of this method's output is as follows: { trial_index --> (parameterization, (means, covariances) }, where means are a dictionary of form { metric_name --> metric_mean } and covariances are a nested dictionary of form { one_metric_name --> { another_metric_name: covariance } }. Args: experiment: Experiment, from which to find Pareto-optimal arms. generation_strategy: Generation strategy containing the modelbridge. use_model_predictions: Whether to extract the Pareto frontier using model predictions or directly observed values. If ``True``, the metric means and covariances in this method's output will also be based on model predictions and may differ from the observed values. Returns: ``None`` if it was not possible to extract the Pareto frontier, otherwise a mapping from trial index to the tuple of: - the parameterization of the arm in that trial, - two-item tuple of metric means dictionary and covariance matrix (model-predicted if ``use_model_predictions=True`` and observed otherwise). """ # Validate aspects of the experiment: that it is a MOO experiment and # that the current model can be used to produce the Pareto frontier. if not not_none(experiment.optimization_config).is_moo_problem: raise UnsupportedError( "Please use `get_best_parameters` for single-objective problems.") moo_optimization_config = checked_cast(MultiObjectiveOptimizationConfig, experiment.optimization_config) if moo_optimization_config.outcome_constraints: # TODO[drfreund]: Test this flow and remove error. raise NotImplementedError( "Support for outcome constraints is currently under development.") # Extract or instantiate modelbridge to use for Pareto frontier extraction. mb = generation_strategy.model if mb is None or not isinstance(mb, MultiObjectiveTorchModelBridge): logger.info( "Can only extract a Pareto frontier using a multi-objective model bridge" f", but currently used model bridge is: {mb} of type {type(mb)}. Will " "use `Models.MOO` instead to extract Pareto frontier.") mb = checked_cast( MultiObjectiveTorchModelBridge, Models.MOO(experiment=experiment, data=checked_cast(Data, experiment.lookup_data())), ) else: # Make sure the model is up-to-date with the most recent data. generation_strategy._set_or_update_current_model(data=None) # If objective thresholds are not specified in optimization config, extract # the inferred ones if possible or infer them anew if not. objective_thresholds_override = None if not moo_optimization_config.objective_thresholds: lgr = generation_strategy.last_generator_run if lgr and lgr.gen_metadata and "objective_thresholds" in lgr.gen_metadata: objective_thresholds_override = lgr.gen_metadata[ "objective_thresholds"] objective_thresholds_override = mb.infer_objective_thresholds( search_space=experiment.search_space, optimization_config=experiment.optimization_config, fixed_features=None, ) logger.info( f"Using inferred objective thresholds: {objective_thresholds_override}, " "as objective thresholds were not specified as part of the optimization " "configuration on the experiment.") # Extract the Pareto frontier and format it as follows: # { trial_index --> (parameterization, (means, covariances) } pareto_util = predicted_pareto if use_model_predictions else observed_pareto pareto_optimal_observations = pareto_util( modelbridge=mb, objective_thresholds=objective_thresholds_override) return { int(not_none(obs.features.trial_index)): ( obs.features.parameters, (obs.data.means_dict, obs.data.covariance_matrix), ) for obs in pareto_optimal_observations }
def get_best_from_model_predictions_with_trial_index( experiment: Experiment, ) -> Optional[Tuple[int, TParameterization, Optional[TModelPredictArm]]]: """Given an experiment, returns the best predicted parameterization and corresponding prediction based on the most recent Trial with predictions. If no trials have predictions returns None. Only some models return predictions. For instance GPEI does while Sobol does not. TModelPredictArm is of the form: ({metric_name: mean}, {metric_name_1: {metric_name_2: cov_1_2}}) Args: experiment: Experiment, on which to identify best raw objective arm. Returns: Tuple of parameterization and model predictions for it. """ # pyre-ignore [16] if isinstance(experiment.optimization_config.objective, MultiObjective): logger.warning( "get_best_from_model_predictions is deprecated for multi-objective " "optimization configs. This method will return an arbitrary point on " "the pareto frontier.") for idx, trial in sorted(experiment.trials.items(), key=lambda x: x[0], reverse=True): gr = None if isinstance(trial, Trial): gr = trial.generator_run elif isinstance(trial, BatchTrial): if len(trial.generator_run_structs) > 0: # In theory batch_trial can have >1 gr, grab the first gr = trial.generator_run_structs[0].generator_run if gr is not None and gr.best_arm_predictions is not None: # pragma: no cover data = experiment.lookup_data() if not isinstance(data, Data): return _gr_to_prediction_with_trial_index(idx, gr) model = get_model_from_generator_run(generator_run=gr, experiment=experiment, data=data) # If model is not ArrayModelBridge, just use the best arm frmo the # last good generator run if not isinstance(model, ArrayModelBridge): return _gr_to_prediction_with_trial_index(idx, gr) # Check to see if the model is worth using cv_results = cross_validate(model=model) diagnostics = compute_diagnostics(result=cv_results) assess_model_fit_results = assess_model_fit( diagnostics=diagnostics) objective_name = experiment.optimization_config.objective.metric.name # If model fit is bad use raw results if (objective_name in assess_model_fit_results.bad_fit_metrics_to_fisher_score): logger.warning( "Model fit is poor; falling back on raw data for best point." ) if not _is_all_noiseless(df=data.df, metric_name=objective_name): logger.warning( "Model fit is poor and data on objective metric " + f"{objective_name} is noisy; interpret best points " + "results carefully.") return _get_best_poor_model_fit(experiment=experiment) res = model.model_best_point() if res is None: return _gr_to_prediction_with_trial_index(idx, gr) best_arm, best_arm_predictions = res return idx, not_none(best_arm).parameters, best_arm_predictions return None
def should_stop_trials_early( self, trial_indices: Set[int], experiment: Experiment, **kwargs: Dict[str, Any], ) -> Dict[int, Optional[str]]: """Stop a trial if its performance is in the bottom `percentile_threshold` of the trials at the same step. Args: trial_indices: Indices of candidate trials to consider for early stopping. experiment: Experiment that contains the trials and other contextual data. Returns: A dictionary mapping trial indices that should be early stopped to (optional) messages with the associated reason. An empty dictionary means no suggested updates to any trial's status. """ if experiment.optimization_config is None: raise UnsupportedError( # pragma: no cover "Experiment must have an optimization config in order to use an " "early stopping strategy." ) optimization_config = not_none(experiment.optimization_config) objective_name = optimization_config.objective.metric.name minimize = optimization_config.objective.minimize data = experiment.lookup_data(keep_latest_map_values_only=False) if data.df.empty: logger.info( "PercentileEarlyStoppingStrategy received empty data. " "Not stopping any trials." ) return {} if not isinstance(data, MapData): raise ValueError( "PercentileEarlyStoppingStrategy expects MapData, but the " f"data attached to experiment is of type {type(data)}." ) map_keys = data.map_keys if len(map_keys) > 1: raise ValueError( # pragma: no cover "PercentileEarlyStoppingStrategy expects MapData with a single " "map key, but the data attached to the experiment has multiple: " f"{data.map_keys}." ) map_key = map_keys[0] df = data.df metric_to_aligned_means, _ = align_partial_results( df=df, progr_key=map_key, metrics=[objective_name], ) aligned_means = metric_to_aligned_means[objective_name] decisions = { trial_index: self.should_stop_trial_early( trial_index=trial_index, experiment=experiment, df=aligned_means, percentile_threshold=self.percentile_threshold, map_key=map_key, minimize=minimize, ) for trial_index in trial_indices } return { trial_index: reason for trial_index, (should_stop, reason) in decisions.items() if should_stop }
def get_standard_plots( experiment: Experiment, model: Optional[ModelBridge], data: Optional[Union[Data, MapData]] = None, model_transitions: Optional[List[int]] = None, ) -> List[go.Figure]: """Extract standard plots for single-objective optimization. Extracts a list of plots from an ``Experiment`` and ``ModelBridge`` of general interest to an Ax user. Currently not supported are - TODO: multi-objective optimization - TODO: ChoiceParameter plots Args: - experiment: The ``Experiment`` from which to obtain standard plots. - model: The ``ModelBridge`` used to suggest trial parameters. - data: If specified, data, to which to fit the model before generating plots. - model_transitions: The arm numbers at which shifts in generation_strategy occur. Returns: - a plot of objective value vs. trial index, to show experiment progression - a plot of objective value vs. range parameter values, only included if the model associated with generation_strategy can create predictions. This consists of: - a plot_slice plot if the search space contains one range parameter - an interact_contour plot if the search space contains multiple range parameters """ objective = not_none(experiment.optimization_config).objective if isinstance(objective, ScalarizedObjective): logger.warning( "get_standard_plots does not currently support ScalarizedObjective " "optimization experiments. Returning an empty list.") return [] if data is None: data = experiment.lookup_data() if isinstance(data, MapData): data = data.deduplicate_data() if data.df.empty: logger.info( f"Experiment {experiment} does not yet have data, nothing to plot." ) return [] output_plot_list = [] output_plot_list.append( _get_objective_trace_plot( experiment=experiment, data=checked_cast(Data, data) if isinstance(data, Data) else checked_cast(MapData, data), model_transitions=model_transitions if model_transitions is not None else [], )) # Objective vs. parameter plot requires a `Model`, so add it only if model # is alrady available. In cases where initially custom trials are attached, # model might not yet be set on the generation strategy. if model: # TODO: Check if model can predict in favor of try/catch. try: output_plot_list.extend( _get_objective_v_param_plots( experiment=experiment, model=model, )) output_plot_list.extend(_get_cross_validation_plots(model=model)) feature_importance_plot = plot_feature_importance_by_feature_plotly( model=model, relative=False, caption=feature_importance_caption) feature_importance_plot.layout.title = "[ADVANCED] " + str( # pyre-fixme[16]: go.Figure has no attribute `layout` feature_importance_plot.layout.title.text) output_plot_list.append(feature_importance_plot) except NotImplementedError: # Model does not implement `predict` method. pass return [plot for plot in output_plot_list if plot is not None]