Exemplo n.º 1
0
    def testIsFactorial(self):
        self.assertFalse(self.batch.is_factorial)

        # Insufficient factors
        small_experiment = Experiment(
            name="small_test",
            search_space=SearchSpace(
                [FixedParameter("a", ParameterType.INT, 4)]),
        )
        small_trial = small_experiment.new_batch_trial().add_arm(Arm({"a": 4}))
        self.assertFalse(small_trial.is_factorial)

        new_batch_trial = self.experiment.new_batch_trial()
        new_batch_trial.add_arms_and_weights(arms=[
            Arm(parameters={
                "w": 0.75,
                "x": 1,
                "y": "foo",
                "z": True
            }),
            Arm(parameters={
                "w": 0.75,
                "x": 2,
                "y": "foo",
                "z": True
            }),
            Arm(parameters={
                "w": 0.77,
                "x": 1,
                "y": "foo",
                "z": True
            }),
        ])
        self.assertFalse(new_batch_trial.is_factorial)

        new_batch_trial = self.experiment.new_batch_trial()
        new_batch_trial.add_arms_and_weights(arms=[
            Arm(parameters={
                "w": 0.77,
                "x": 1,
                "y": "foo",
                "z": True
            }),
            Arm(parameters={
                "w": 0.77,
                "x": 2,
                "y": "foo",
                "z": True
            }),
            Arm(parameters={
                "w": 0.75,
                "x": 1,
                "y": "foo",
                "z": True
            }),
            Arm(parameters={
                "w": 0.75,
                "x": 2,
                "y": "foo",
                "z": True
            }),
        ])
        self.assertTrue(new_batch_trial.is_factorial)
Exemplo n.º 2
0
def experiment_from_json(object_json: Dict[str, Any]) -> Experiment:
    """Load Ax Experiment from JSON."""
    time_created_json = object_json.pop("time_created")
    trials_json = object_json.pop("trials")
    experiment_type_json = object_json.pop("experiment_type")
    data_by_trial_json = object_json.pop("data_by_trial")
    experiment = Experiment(
        **{k: object_from_json(v)
           for k, v in object_json.items()})
    experiment._time_created = object_from_json(time_created_json)
    experiment._trials = trials_from_json(experiment, trials_json)
    experiment._arms_by_name = {}
    for trial in experiment._trials.values():
        for arm in trial.arms:
            experiment._register_arm(arm)
        if trial.ttl_seconds is not None:
            experiment._trials_have_ttl = True
    if experiment.status_quo is not None:
        sq = not_none(experiment.status_quo)
        experiment._register_arm(sq)
    experiment._experiment_type = object_from_json(experiment_type_json)
    experiment._data_by_trial = data_from_json(data_by_trial_json)
    return experiment
Exemplo n.º 3
0
def _benchmark_replication_Async_Scheduler(
    problem: BenchmarkProblem,
    method: GenerationStrategy,
    num_trials: int,
    experiment_name: str,
    batch_size: int = 1,
    raise_all_exceptions: bool = False,
    benchmark_trial: FunctionType = benchmark_trial,
    verbose_logging: bool = True,
    # Number of trials that need to fail for a replication to be considered failed.
    failed_trials_tolerated: int = 5,
    async_benchmark_options: Optional[AsyncBenchmarkOptions] = None,
) -> Tuple[Experiment, List[Exception]]:
    """Run a benchmark replication with asynchronous evaluations through Scheduler.
    The Scheduler interacts with a BackendSimulator.
    """
    if async_benchmark_options is None:
        raise NonRetryableBenchmarkingError(
            "`async_benchmark_options` required for Scheduler benchmarks."
        )

    backend_options = (
        async_benchmark_options.backend_options
        or BackendSimulatorOptions(
            internal_clock=0.0,
            max_concurrency=async_benchmark_options.max_pending_trials,
        )
    )
    backend_simulator = BackendSimulator(
        options=backend_options, verbose_logging=verbose_logging
    )

    experiment = Experiment(
        name=experiment_name,
        search_space=problem.search_space,
        optimization_config=problem.optimization_config,
        runner=SimulatedBackendRunner(
            simulator=backend_simulator,
            sample_runtime_func=async_benchmark_options.sample_runtime_func,
        ),
    )

    scheduler_options = async_benchmark_options.scheduler_options or SchedulerOptions(
        total_trials=None,
        init_seconds_between_polls=1,
        min_seconds_before_poll=1.0,
        seconds_between_polls_backoff_factor=1.0,
        logging_level=logging.INFO if verbose_logging else logging.WARNING,
        early_stopping_strategy=async_benchmark_options.early_stopping_strategy,
    )

    scheduler = AsyncSimulatedBackendScheduler(
        experiment=experiment,
        generation_strategy=method,
        max_pending_trials=async_benchmark_options.max_pending_trials,
        options=scheduler_options,
    )
    scheduler.run_n_trials(
        max_trials=num_trials, timeout_hours=async_benchmark_options.timeout_hours
    )

    # update the trial metadata with start time
    # Note: we could also do it in the BackendSimulator if it got access to the Trial
    for sim_trial in backend_simulator._completed:
        metadata_dict = {
            "start_time": sim_trial.sim_start_time,
            "queued_time": sim_trial.sim_queued_time,
            "completed_time": sim_trial.sim_completed_time,
        }
        experiment.trials[sim_trial.trial_index].update_run_metadata(metadata_dict)
    return experiment, []
Exemplo n.º 4
0
def get_best_from_model_predictions_with_trial_index(
    experiment: Experiment,
) -> Optional[Tuple[int, TParameterization, Optional[TModelPredictArm]]]:
    """Given an experiment, returns the best predicted parameterization and corresponding
    prediction based on the most recent Trial with predictions. If no trials have
    predictions returns None.

    Only some models return predictions. For instance GPEI does while Sobol does not.

    TModelPredictArm is of the form:
        ({metric_name: mean}, {metric_name_1: {metric_name_2: cov_1_2}})

    Args:
        experiment: Experiment, on which to identify best raw objective arm.

    Returns:
        Tuple of parameterization and model predictions for it.
    """
    # pyre-ignore [16]
    if isinstance(experiment.optimization_config.objective, MultiObjective):
        logger.warning(
            "get_best_from_model_predictions is deprecated for multi-objective "
            "optimization configs. This method will return an arbitrary point on "
            "the pareto frontier.")
    for idx, trial in sorted(experiment.trials.items(),
                             key=lambda x: x[0],
                             reverse=True):
        gr = None
        if isinstance(trial, Trial):
            gr = trial.generator_run
        elif isinstance(trial, BatchTrial):
            if len(trial.generator_run_structs) > 0:
                # In theory batch_trial can have >1 gr, grab the first
                gr = trial.generator_run_structs[0].generator_run

        if gr is not None and gr.best_arm_predictions is not None:  # pragma: no cover
            data = experiment.lookup_data()
            if not isinstance(data, Data):
                return _gr_to_prediction_with_trial_index(idx, gr)

            model = get_model_from_generator_run(generator_run=gr,
                                                 experiment=experiment,
                                                 data=data)

            # If model is not ArrayModelBridge, just use the best arm frmo the
            # last good generator run
            if not isinstance(model, ArrayModelBridge):
                return _gr_to_prediction_with_trial_index(idx, gr)

            # Check to see if the model is worth using
            cv_results = cross_validate(model=model)
            diagnostics = compute_diagnostics(result=cv_results)
            assess_model_fit_results = assess_model_fit(
                diagnostics=diagnostics)
            objective_name = experiment.optimization_config.objective.metric.name
            # If model fit is bad use raw results
            if (objective_name in
                    assess_model_fit_results.bad_fit_metrics_to_fisher_score):
                logger.warning(
                    "Model fit is poor; falling back on raw data for best point."
                )

                if not _is_all_noiseless(df=data.df,
                                         metric_name=objective_name):
                    logger.warning(
                        "Model fit is poor and data on objective metric " +
                        f"{objective_name} is noisy; interpret best points " +
                        "results carefully.")

                return _get_best_poor_model_fit(experiment=experiment)

            res = model.model_best_point()
            if res is None:
                return _gr_to_prediction_with_trial_index(idx, gr)

            best_arm, best_arm_predictions = res

            return idx, not_none(best_arm).parameters, best_arm_predictions

    return None
Exemplo n.º 5
0
    def _init_experiment_from_sqa(self, experiment_sqa: SQAExperiment) -> Experiment:
        """First step of conversion within experiment_from_sqa."""
        opt_config, tracking_metrics = self.opt_config_and_tracking_metrics_from_sqa(
            metrics_sqa=experiment_sqa.metrics
        )
        search_space = self.search_space_from_sqa(
            parameters_sqa=experiment_sqa.parameters,
            parameter_constraints_sqa=experiment_sqa.parameter_constraints,
        )
        if search_space is None:
            raise SQADecodeError(  # pragma: no cover
                "Experiment SearchSpace cannot be None."
            )
        status_quo = (
            Arm(
                # pyre-fixme[6]: Expected `Dict[str, Optional[Union[bool, float,
                #  int, str]]]` for 1st param but got `Optional[Dict[str,
                #  Optional[Union[bool, float, int, str]]]]`.
                parameters=experiment_sqa.status_quo_parameters,
                name=experiment_sqa.status_quo_name,
            )
            if experiment_sqa.status_quo_parameters is not None
            else None
        )
        if len(experiment_sqa.runners) == 0:
            runner = None
        elif len(experiment_sqa.runners) == 1:
            runner = self.runner_from_sqa(experiment_sqa.runners[0])
        else:
            raise ValueError(  # pragma: no cover
                "Multiple runners on experiment "
                "only supported for MultiTypeExperiment."
            )

        subclass = (experiment_sqa.properties or {}).get("subclass")
        if subclass == "SimpleExperiment":
            if opt_config is None:
                raise SQADecodeError(  # pragma: no cover
                    "SimpleExperiment must have an optimization config."
                )
            experiment = SimpleExperiment(
                name=experiment_sqa.name,
                search_space=search_space,
                objective_name=opt_config.objective.metric.name,
                minimize=opt_config.objective.minimize,
                outcome_constraints=opt_config.outcome_constraints,
                status_quo=status_quo,
            )
            experiment.description = experiment_sqa.description
            experiment.is_test = experiment_sqa.is_test
        else:
            experiment = Experiment(
                name=experiment_sqa.name,
                description=experiment_sqa.description,
                search_space=search_space,
                optimization_config=opt_config,
                tracking_metrics=tracking_metrics,
                runner=runner,
                status_quo=status_quo,
                is_test=experiment_sqa.is_test,
            )
        return experiment
Exemplo n.º 6
0
def compute_pareto_frontier(
    experiment: Experiment,
    primary_objective: Metric,
    secondary_objective: Metric,
    data: Optional[Data] = None,
    outcome_constraints: Optional[List[OutcomeConstraint]] = None,
    absolute_metrics: Optional[List[str]] = None,
    num_points: int = 10,
    trial_index: Optional[int] = None,
    chebyshev: bool = True,
) -> ParetoFrontierResults:
    """Compute the Pareto frontier between two objectives. For experiments
    with batch trials, a trial index or data object must be provided.

    Args:
        experiment: The experiment to compute a pareto frontier for.
        primary_objective: The primary objective to optimize.
        secondary_objective: The secondary objective against which
            to trade off the primary objective.
        outcome_constraints: Outcome
            constraints to be respected by the optimization. Can only contain
            constraints on metrics that are not primary or secondary objectives.
        absolute_metrics: List of outcome metrics that
            should NOT be relativized w.r.t. the status quo (all other outcomes
            will be in % relative to status_quo).
        num_points: The number of points to compute on the
            Pareto frontier.
        chebyshev: Whether to use augmented_chebyshev_scalarization
            when computing Pareto Frontier points.

    Returns:
        ParetoFrontierResults: A NamedTuple with the following fields:
            - param_dicts: The parameter dicts of the
                points generated on the Pareto Frontier.
            - means: The posterior mean predictions of
                the model for each metric (same order as the param dicts).
            - sems: The posterior sem predictions of
                the model for each metric (same order as the param dicts).
            - primary_metric: The name of the primary metric.
            - secondary_metric: The name of the secondary metric.
            - absolute_metrics: List of outcome metrics that
                are NOT be relativized w.r.t. the status quo (all other metrics
                are in % relative to status_quo).
    """
    # TODO(jej): Implement using MultiObjectiveTorchModelBridge's _pareto_frontier
    model_gen_options = {
        "acquisition_function_kwargs": {
            "chebyshev_scalarization": chebyshev
        }
    }

    if (trial_index is None and data is None and any(
            isinstance(t, BatchTrial) for t in experiment.trials.values())):
        raise UnsupportedError(
            "Must specify trial index or data for experiment with batch trials"
        )
    absolute_metrics = [] if absolute_metrics is None else absolute_metrics
    for metric in absolute_metrics:
        if metric not in experiment.metrics:
            raise ValueError(f"Model was not fit on metric `{metric}`")

    if outcome_constraints is None:
        outcome_constraints = []
    else:
        # ensure we don't constrain an objective
        _validate_outcome_constraints(
            outcome_constraints=outcome_constraints,
            primary_objective=primary_objective,
            secondary_objective=secondary_objective,
        )

    # build posterior mean model
    if not data:
        try:
            data = (experiment.trials[trial_index].fetch_data()
                    if trial_index else experiment.fetch_data())
        except Exception as e:
            logger.info(f"Could not fetch data from experiment or trial: {e}")

    oc = _build_new_optimization_config(
        weights=np.array([0.5, 0.5]),
        primary_objective=primary_objective,
        secondary_objective=secondary_objective,
        outcome_constraints=outcome_constraints,
    )
    model = Models.MOO(
        experiment=experiment,
        data=data,
        acqf_constructor=get_PosteriorMean,
        optimization_config=oc,
    )

    status_quo = experiment.status_quo
    if status_quo:
        try:
            status_quo_prediction = model.predict([
                ObservationFeatures(
                    parameters=status_quo.parameters,
                    # pyre-fixme [6]: Expected `Optional[np.int64]` for trial_index
                    trial_index=trial_index,
                )
            ])
        except ValueError as e:
            logger.warning(f"Could not predict OOD status_quo outcomes: {e}")
            status_quo = None
            status_quo_prediction = None
    else:
        status_quo_prediction = None

    param_dicts: List[TParameterization] = []

    # Construct weightings with linear angular spacing.
    # TODO: Verify whether 0, 1 weights cause problems because of subset_model.
    alpha = np.linspace(0 + 0.01, np.pi / 2 - 0.01, num_points)
    primary_weight = (-1 if primary_objective.lower_is_better else
                      1) * np.cos(alpha)
    secondary_weight = (-1 if secondary_objective.lower_is_better else
                        1) * np.sin(alpha)
    weights_list = np.stack([primary_weight, secondary_weight]).transpose()
    for weights in weights_list:
        outcome_constraints = outcome_constraints
        oc = _build_new_optimization_config(
            weights=weights,
            primary_objective=primary_objective,
            secondary_objective=secondary_objective,
            outcome_constraints=outcome_constraints,
        )
        # TODO: (jej) T64002590 Let this serve as a starting point for optimization.
        # ex. Add global spacing criterion. Implement on BoTorch side.
        # pyre-fixme [6]: Expected different type for model_gen_options
        run = model.gen(1,
                        model_gen_options=model_gen_options,
                        optimization_config=oc)
        param_dicts.append(run.arms[0].parameters)

    # Call predict on points to get their decomposed metrics.
    means, cov = model.predict(
        [ObservationFeatures(parameters) for parameters in param_dicts])

    return _extract_pareto_frontier_results(
        param_dicts=param_dicts,
        means=means,
        variances=cov,
        primary_metric=primary_objective.name,
        secondary_metric=secondary_objective.name,
        absolute_metrics=absolute_metrics,
        outcome_constraints=outcome_constraints,
        status_quo_prediction=status_quo_prediction,
    )
Exemplo n.º 7
0
def make_experiment(
    parameters: List[TParameterRepresentation],
    name: Optional[str] = None,
    parameter_constraints: Optional[List[str]] = None,
    outcome_constraints: Optional[List[str]] = None,
    status_quo: Optional[TParameterization] = None,
    experiment_type: Optional[str] = None,
    # Single-objective optimization arguments:
    objective_name: Optional[str] = None,
    minimize: bool = False,
    # Multi-objective optimization arguments:
    objectives: Optional[Dict[str, str]] = None,
    objective_thresholds: Optional[List[str]] = None,
) -> Experiment:
    """Instantiation wrapper that allows for Ax `Experiment` creation
    without importing or instantiating any Ax classes.

    Args:
        parameters: List of dictionaries representing parameters in the
            experiment search space.
            Required elements in the dictionaries are:
            1. "name" (name of parameter, string),
            2. "type" (type of parameter: "range", "fixed", or "choice", string),
            and one of the following:
            3a. "bounds" for range parameters (list of two values, lower bound
            first),
            3b. "values" for choice parameters (list of values), or
            3c. "value" for fixed parameters (single value).
            Optional elements are:
            1. "log_scale" (for float-valued range parameters, bool),
            2. "value_type" (to specify type that values of this parameter should
            take; expects "float", "int", "bool" or "str"),
            3. "is_fidelity" (bool) and "target_value" (float) for fidelity
            parameters,
            4. "is_ordered" (bool) for choice parameters,
            5. "is_task" (bool) for task parameters, and
            6. "digits" (int) for float-valued range parameters.
        name: Name of the experiment to be created.
        parameter_constraints: List of string representation of parameter
            constraints, such as "x3 >= x4" or "-x3 + 2*x4 - 3.5*x5 >= 2". For
            the latter constraints, any number of arguments is accepted, and
            acceptable operators are "<=" and ">=".
        parameter_constraints: List of string representation of parameter
                constraints, such as "x3 >= x4" or "-x3 + 2*x4 - 3.5*x5 >= 2". For
                the latter constraints, any number of arguments is accepted, and
                acceptable operators are "<=" and ">=".
        outcome_constraints: List of string representation of outcome
            constraints of form "metric_name >= bound", like "m1 <= 3."
        status_quo: Parameterization of the current state of the system.
            If set, this will be added to each trial to be evaluated alongside
            test configurations.
        experiment_type: String indicating type of the experiment (e.g. name of
            a product in which it is used), if any.
        objective_name: Name of the metric used as objective in this experiment,
            if experiment is single-objective optimization.
        minimize: Whether this experiment represents a minimization problem, if
            experiment is a single-objective optimization.
        objectives: Mapping from an objective name to "minimize" or "maximize"
            representing the direction for that objective. Used only for
            multi-objective optimization experiments.
        objective_thresholds: A list of objective threshold constraints for multi-
            objective optimization, in the same string format as `outcome_constraints`
            argument.
    """
    if objective_name is not None and (objectives is not None
                                       or objective_thresholds is not None):
        raise UnsupportedError(
            "Ambiguous objective definition: for single-objective optimization "
            "`objective_name` and `minimize` arguments expected. For multi-objective "
            "optimization `objectives` and `objective_thresholds` arguments expected."
        )

    status_quo_arm = None if status_quo is None else Arm(parameters=status_quo)

    if objectives is None:
        optimization_config = OptimizationConfig(
            objective=Objective(
                metric=Metric(
                    name=objective_name or DEFAULT_OBJECTIVE_NAME,
                    lower_is_better=minimize,
                ),
                minimize=minimize,
            ),
            outcome_constraints=make_outcome_constraints(
                outcome_constraints or [], status_quo_arm is not None),
        )
    else:
        optimization_config = make_optimization_config(
            objectives,
            objective_thresholds or [],
            outcome_constraints or [],
            status_quo_arm is not None,
        )

    return Experiment(
        name=name,
        search_space=make_search_space(parameters, parameter_constraints
                                       or []),
        optimization_config=optimization_config,
        status_quo=status_quo_arm,
        experiment_type=experiment_type,
    )
Exemplo n.º 8
0
def _extract_asynchronous_optimization_trace(
    experiment: Experiment,
    start_time: float,
    end_time: float,
    delta_t: float,
    completed_time_key: str,
    include_only_completed_trials: bool,
) -> np.ndarray:
    """Extract optimization trace for an asynchronous benchmark run. This involves
    getting the `completed_time` from the trial `run_metadata`, as described by
    the `completed_time_key`. From the `start_time`, `end_time`, and `delta_t`
    arguments, a sequence of times is constructed. The returned optimization trace
    is the best achieved value so far for each time, amongst completed (or early
    stopped) trials.

    Args:
        experiment: The experiment from which to generate results.
        start_time: The starting time.
        end_time: The ending time.
        delta_t: The increment between successive time points.
        completed_time_key: The key from which we look up completed run times
            from trial `run_metadata`.
        include_only_completed_trials: Include results only from completed trials.
            This will ignore trials that were early stopped.

    Returns:
        An array representing the optimization trace as a function of time.
    """
    if any(
            isinstance(trial, BatchTrial)
            for trial in experiment.trials.values()):
        raise NotImplementedError("Batched trials are not yet supported.")

    def get_completed_time(row):
        time = experiment.trials[
            row.trial_index].run_metadata[completed_time_key]
        return pd.Series({"completed_time": time})

    if include_only_completed_trials:
        completed_trials = experiment.trial_indices_by_status[
            TrialStatus.COMPLETED]
        data_df = experiment.fetch_trials_data(trial_indices=completed_trials,
                                               noisy=False).df
    else:
        data_df = experiment.fetch_data(noisy=False).df

    minimize = experiment.optimization_config.objective.minimize  # pyre-ignore[16]
    num_periods_running = int((end_time - start_time) // delta_t + 1)
    # TODO: Currently, the timestamps generated below must exactly match the
    # `completed_time` column
    iters_df = pd.DataFrame({
        "completed_time":
        np.arange(num_periods_running) * delta_t + start_time
    })
    true_values = {}
    for metric, df_m in data_df.groupby("metric_name"):
        df_m = data_df[data_df["metric_name"] == metric]

        # only keep the last data point for each arm
        df_m = (df_m.sort_values(["timestamp"],
                                 ascending=True).groupby("arm_name").tail(n=1))

        # get completed times from run metadata
        df_m["completed_time"] = df_m.apply(get_completed_time, axis=1)

        # for trials that completed at the same time, keep only the best
        df_m_g = df_m.groupby("completed_time")
        df_m = (df_m_g.min() if minimize else df_m_g.max()).reset_index()

        # take cumulative best wrt the completed time
        df_m = df_m.sort_index()
        df_m["mean"] = df_m["mean"].cummin(
        ) if minimize else df_m["mean"].cummax()
        df_b = pd.merge(iters_df, df_m, how="left", on="completed_time")

        # replace nans with Infs, which can be handled by `best_feasible_objective`
        true_values[metric] = df_b["mean"].fillna(
            np.Inf if minimize else -np.Inf)
    return best_feasible_objective(
        # pyre-fixme[6]: Expected `OptimizationConfig` for 1st param but got
        #  `Optional[ax.core.optimization_config.OptimizationConfig]`.
        optimization_config=experiment.optimization_config,
        values=true_values,
    )
Exemplo n.º 9
0
def get_hierarchical_search_space_experiment() -> Experiment:
    return Experiment(
        search_space=get_hierarchical_search_space(),
        optimization_config=get_optimization_config(),
    )
Exemplo n.º 10
0
    def experiment_from_sqa(self, experiment_sqa: SQAExperiment) -> Experiment:
        """Convert SQLAlchemy Experiment to Ax Experiment."""
        opt_config, tracking_metrics = self.opt_config_and_tracking_metrics_from_sqa(
            metrics_sqa=experiment_sqa.metrics)
        search_space = self.search_space_from_sqa(
            parameters_sqa=experiment_sqa.parameters,
            parameter_constraints_sqa=experiment_sqa.parameter_constraints,
        )
        if search_space is None:
            raise SQADecodeError(  # pragma: no cover
                "Experiment SearchSpace cannot be None.")
        runner = (self.runner_from_sqa(experiment_sqa.runner)
                  if experiment_sqa.runner else None)
        status_quo = (Arm(
            parameters=experiment_sqa.status_quo_parameters,
            name=experiment_sqa.status_quo_name,
        ) if experiment_sqa.status_quo_parameters is not None else None)

        if (experiment_sqa.properties is not None
                and experiment_sqa.properties.get("subclass")
                == "SimpleExperiment"):
            if opt_config is None:
                raise SQADecodeError(  # pragma: no cover
                    "SimpleExperiment must have an optimization config.")
            experiment = SimpleExperiment(
                name=experiment_sqa.name,
                search_space=search_space,
                objective_name=opt_config.objective.metric.name,
                minimize=opt_config.objective.minimize,
                outcome_constraints=opt_config.outcome_constraints,
                status_quo=status_quo,
            )
            experiment.description = experiment_sqa.description
            experiment.is_test = experiment_sqa.is_test
        else:
            experiment = Experiment(
                name=experiment_sqa.name,
                description=experiment_sqa.description,
                search_space=search_space,
                optimization_config=opt_config,
                tracking_metrics=tracking_metrics,
                runner=runner,
                status_quo=status_quo,
                is_test=experiment_sqa.is_test,
            )

        trials = [
            self.trial_from_sqa(trial_sqa=trial, experiment=experiment)
            for trial in experiment_sqa.trials
        ]

        data_by_trial = defaultdict(dict)
        for data_sqa in experiment_sqa.data:
            trial_index = data_sqa.trial_index
            timestamp = data_sqa.time_created
            data_by_trial[trial_index][timestamp] = self.data_from_sqa(
                data_sqa=data_sqa)
        data_by_trial = {
            trial_index: OrderedDict(sorted(data_by_timestamp.items()))
            for trial_index, data_by_timestamp in data_by_trial.items()
        }

        experiment._trials = {trial.index: trial for trial in trials}
        for trial in trials:
            for arm in trial.arms:
                experiment._arms_by_signature[arm.signature] = arm
        if experiment.status_quo is not None:
            sq_sig = experiment.status_quo.signature
            experiment._arms_by_signature[sq_sig] = experiment.status_quo
        experiment._time_created = experiment_sqa.time_created
        experiment._experiment_type = self.get_enum_name(
            value=experiment_sqa.experiment_type,
            enum=self.config.experiment_type_enum)
        experiment._data_by_trial = dict(data_by_trial)

        return experiment
Exemplo n.º 11
0
    def test_REMBOStrategy(self, mock_fit_gpytorch_model, mock_optimize_acqf):
        # Construct a high-D test experiment with multiple metrics
        hartmann_search_space = SearchSpace(
            parameters=[
                RangeParameter(
                    name=f"x{i}",
                    parameter_type=ParameterType.FLOAT,
                    lower=0.0,
                    upper=1.0,
                )
                for i in range(20)
            ]
        )

        exp = Experiment(
            name="test",
            search_space=hartmann_search_space,
            optimization_config=OptimizationConfig(
                objective=Objective(
                    metric=Hartmann6Metric(
                        name="hartmann6", param_names=[f"x{i}" for i in range(6)]
                    ),
                    minimize=True,
                ),
                outcome_constraints=[
                    OutcomeConstraint(
                        metric=L2NormMetric(
                            name="l2norm",
                            param_names=[f"x{i}" for i in range(6)],
                            noise_sd=0.2,
                        ),
                        op=ComparisonOp.LEQ,
                        bound=1.25,
                        relative=False,
                    )
                ],
            ),
            runner=SyntheticRunner(),
        )

        # Instantiate the strategy
        gs = REMBOStrategy(D=20, d=6, k=4, init_per_proj=4)

        # Check that arms and data are correctly segmented by projection
        exp.new_batch_trial(generator_run=gs.gen(experiment=exp, n=2)).run()
        self.assertEqual(len(gs.arms_by_proj[0]), 2)
        self.assertEqual(len(gs.arms_by_proj[1]), 0)

        exp.new_batch_trial(generator_run=gs.gen(experiment=exp, n=2)).run()

        self.assertEqual(len(gs.arms_by_proj[0]), 2)
        self.assertEqual(len(gs.arms_by_proj[1]), 2)

        # Iterate until the first projection fits a GP
        for _ in range(4):
            exp.new_batch_trial(generator_run=gs.gen(experiment=exp, n=2)).run()
            mock_fit_gpytorch_model.assert_not_called()

        self.assertEqual(len(gs.arms_by_proj[0]), 4)
        self.assertEqual(len(gs.arms_by_proj[1]), 4)
        self.assertEqual(len(gs.arms_by_proj[2]), 2)
        self.assertEqual(len(gs.arms_by_proj[3]), 2)

        # Keep iterating until GP is used for gen
        for i in range(4):
            # First two trials will go towards 3rd and 4th proj. getting enough
            if i < 1:  # data for GP.
                self.assertLess(len(gs.arms_by_proj[2]), 4)
            if i < 2:
                self.assertLess(len(gs.arms_by_proj[3]), 4)

            exp.new_batch_trial(generator_run=gs.gen(experiment=exp, n=2)).run()
            if i < 2:
                mock_fit_gpytorch_model.assert_not_called()
            else:
                # After all proj. have > 4 arms' worth of data, GP can be fit.
                self.assertFalse(any(len(x) < 4 for x in gs.arms_by_proj.values()))
                mock_fit_gpytorch_model.assert_called()

        self.assertTrue(len(gs.model_transitions) > 0)
        gs2 = gs.clone_reset()
        self.assertEqual(gs2.D, 20)
        self.assertEqual(gs2.d, 6)
Exemplo n.º 12
0
def exp_to_df(
    exp: Experiment,
    metrics: Optional[List[Metric]] = None,
    run_metadata_fields: Optional[List[str]] = None,
    trial_properties_fields: Optional[List[str]] = None,
    **kwargs: Any,
) -> pd.DataFrame:
    """Transforms an experiment to a DataFrame. Only supports Experiment and
    SimpleExperiment.

    Transforms an Experiment into a dataframe with rows keyed by trial_index
    and arm_name, metrics pivoted into one row.

    Args:
        exp: An Experiment that may have pending trials.
        metrics: Override list of metrics to return. Return all metrics if None.
        run_metadata_fields: fields to extract from trial.run_metadata for trial
            in experiment.trials. If there are multiple arms per trial, these
            fields will be replicated across the arms of a trial.
        trial_properties_fields: fields to extract from trial._properties for trial
            in experiment.trials. If there are multiple arms per trial, these fields
            will be replicated across the arms of a trial. Output columns names will be
            prepended with "trial_properties_".

        **kwargs: Custom named arguments, useful for passing complex
            objects from call-site to the `fetch_data` callback.

    Returns:
        DataFrame: A dataframe of inputs, metadata and metrics by trial and arm. If
        no trials are available, returns an empty dataframe. If no metric ouputs are
        available, returns a dataframe of inputs and metadata.
    """
    def prep_return(df: pd.DataFrame, drop_col: str,
                    sort_by: List[str]) -> pd.DataFrame:
        return not_none(
            not_none(df.drop(drop_col, axis=1)).sort_values(sort_by))

    def merge_trials_dict_with_df(df: pd.DataFrame, trials_dict: Dict[int,
                                                                      Any],
                                  column_name: str) -> None:
        """Add a column ``column_name`` to a DataFrame ``df`` containing a column
        ``trial_index``. Each value of the new column is given by the element of
        ``trials_dict`` indexed by ``trial_index``.

        Args:
            df: Pandas DataFrame with column ``trial_index``, to be appended with a new
                column.
            trials_dict: Dict mapping each ``trial_index`` to a value. The new column of
                df will be populated with the value corresponding with the
                ``trial_index`` of each row.
            column_name: Name of the column to be appended to ``df``.
        """

        if "trial_index" not in df.columns:
            raise ValueError("df must have trial_index column")
        if any(trials_dict.values()):  # field present for any trial
            if not all(trials_dict.values()):  # not present for all trials
                logger.warning(
                    f"Column {column_name} missing for some trials. "
                    "Filling with None when missing.")
            df[column_name] = [
                trials_dict[trial_index] for trial_index in df.trial_index
            ]
        else:
            logger.warning(f"Column {column_name} missing for all trials. "
                           "Not appending column.")

    def get_generation_method_str(trial: BaseTrial) -> str:
        generation_methods = {
            not_none(generator_run._model_key)
            for generator_run in trial.generator_runs
            if generator_run._model_key is not None
        }
        # add "Manual" if any generator_runs are manual
        if any(generator_run.generator_run_type == GeneratorRunType.MANUAL.name
               for generator_run in trial.generator_runs):
            generation_methods.add("Manual")
        return ", ".join(
            generation_methods) if generation_methods else "Unknown"

    # Accept Experiment and SimpleExperiment
    if isinstance(exp, MultiTypeExperiment):
        raise ValueError(
            "Cannot transform MultiTypeExperiments to DataFrames.")

    key_components = ["trial_index", "arm_name"]

    # Get each trial-arm with parameters
    arms_df = pd.DataFrame()
    for trial_index, trial in exp.trials.items():
        for arm in trial.arms:
            arms_df = arms_df.append(
                {
                    "arm_name": arm.name,
                    "trial_index": trial_index,
                    **arm.parameters
                },
                ignore_index=True,
            )

    # Fetch results; in case arms_df is empty, return empty results (legacy behavior)
    results = exp.fetch_data(metrics, **kwargs).df
    if len(arms_df.index) == 0:
        if len(results.index) != 0:
            raise ValueError(
                "exp.fetch_data().df returned more rows than there are experimental "
                "arms. This is an inconsistent experimental state. Please report to "
                "Ax support.")
        return results

    # Create key column from key_components
    arms_df["trial_index"] = arms_df["trial_index"].astype(int)
    key_col = "-".join(key_components)
    key_vals = arms_df[key_components[0]].astype("str") + arms_df[
        key_components[1]].astype("str")
    arms_df[key_col] = key_vals

    # Add trial status
    trials = exp.trials.items()
    trial_to_status = {index: trial.status.name for index, trial in trials}
    merge_trials_dict_with_df(df=arms_df,
                              trials_dict=trial_to_status,
                              column_name="trial_status")

    # Add generation_method, accounting for the generic case that generator_runs is of
    # arbitrary length. Repeated methods within a trial are condensed via `set` and an
    # empty set will yield "Unknown" as the method.
    trial_to_generation_method = {
        trial_index: get_generation_method_str(trial)
        for trial_index, trial in trials
    }

    merge_trials_dict_with_df(
        df=arms_df,
        trials_dict=trial_to_generation_method,
        column_name="generation_method",
    )

    # Add any trial properties fields to arms_df
    if trial_properties_fields is not None:
        # add trial._properties fields
        for field in trial_properties_fields:
            trial_to_properties_field = {
                trial_index: (trial._properties[field]
                              if field in trial._properties else None)
                for trial_index, trial in trials
            }
            merge_trials_dict_with_df(
                df=arms_df,
                trials_dict=trial_to_properties_field,
                column_name="trial_properties_" + field,
            )

    # Add any run_metadata fields to arms_df
    if run_metadata_fields is not None:
        # add run_metadata fields
        for field in run_metadata_fields:
            trial_to_metadata_field = {
                trial_index: (trial.run_metadata[field]
                              if field in trial.run_metadata else None)
                for trial_index, trial in trials
            }
            merge_trials_dict_with_df(
                df=arms_df,
                trials_dict=trial_to_metadata_field,
                column_name=field,
            )

    if len(results.index) == 0:
        logger.info(
            f"No results present for the specified metrics `{metrics}`. "
            "Returning arm parameters and metadata only.")
        exp_df = arms_df
    elif not all(col in results.columns for col in key_components):
        logger.warn(
            f"At least one of key columns `{key_components}` not present in results df "
            f"`{results}`. Returning arm parameters and metadata only.")
        exp_df = arms_df
    else:
        # prepare results for merge
        key_vals = results[key_components[0]].astype("str") + results[
            key_components[1]].astype("str")
        results[key_col] = key_vals
        metric_vals = results.pivot(index=key_col,
                                    columns="metric_name",
                                    values="mean").reset_index()

        # dedupe results by key_components
        metadata = results[key_components + [key_col]].drop_duplicates()
        metrics_df = pd.merge(metric_vals, metadata, on=key_col)

        # merge and return
        exp_df = pd.merge(metrics_df,
                          arms_df,
                          on=key_components + [key_col],
                          how="outer")
    return prep_return(df=exp_df, drop_col=key_col, sort_by=["arm_name"])
Exemplo n.º 13
0
def get_standard_plots(
    experiment: Experiment,
    model: Optional[ModelBridge],
    model_transitions: Optional[List[int]] = None,
) -> List[go.Figure]:
    """Extract standard plots for single-objective optimization.

    Extracts a list of plots from an ``Experiment`` and ``ModelBridge`` of general
    interest to an Ax user. Currently not supported are
    - TODO: multi-objective optimization
    - TODO: ChoiceParameter plots

    Args:
        - experiment: The ``Experiment`` from which to obtain standard plots.
        - model: The ``ModelBridge`` used to suggest trial parameters.
        - data: If specified, data, to which to fit the model before generating plots.
        - model_transitions: The arm numbers at which shifts in generation_strategy
            occur.

    Returns:
        - a plot of objective value vs. trial index, to show experiment progression
        - a plot of objective value vs. range parameter values, only included if the
          model associated with generation_strategy can create predictions. This
          consists of:

            - a plot_slice plot if the search space contains one range parameter
            - an interact_contour plot if the search space contains multiple
              range parameters

    """

    objective = not_none(experiment.optimization_config).objective
    if isinstance(objective, MultiObjective):
        logger.warning(
            "get_standard_plots does not currently support MultiObjective "
            "optimization experiments. Returning an empty list.")
        return []
    if isinstance(objective, ScalarizedObjective):
        logger.warning(
            "get_standard_plots does not currently support ScalarizedObjective "
            "optimization experiments. Returning an empty list.")
        return []

    if experiment.fetch_data().df.empty:
        logger.info(
            f"Experiment {experiment} does not yet have data, nothing to plot."
        )
        return []

    output_plot_list = []
    output_plot_list.append(
        _get_objective_trace_plot(
            experiment=experiment,
            metric_name=not_none(
                experiment.optimization_config).objective.metric.name,
            model_transitions=model_transitions
            if model_transitions is not None else [],
            optimization_direction=("minimize" if not_none(
                experiment.optimization_config).objective.minimize else
                                    "maximize"),
        ))

    # Objective vs. parameter plot requires a `Model`, so add it only if model
    # is alrady available. In cases where initially custom trials are attached,
    # model might not yet be set on the generation strategy.
    if model:
        # TODO: Check if model can predict in favor of try/catch.
        try:
            output_plot_list.append(
                _get_objective_v_param_plot(
                    search_space=experiment.search_space,
                    model=model,
                    metric_name=not_none(
                        experiment.optimization_config).objective.metric.name,
                    trials=experiment.trials,
                ))
            output_plot_list.append(_get_cross_validation_plot(model))
        except NotImplementedError:
            # Model does not implement `predict` method.
            pass

    return [plot for plot in output_plot_list if plot is not None]
Exemplo n.º 14
0
def get_experiment_for_value() -> Experiment:
    return Experiment(get_search_space_for_value(), "test")
Exemplo n.º 15
0
def get_standard_plots(
    experiment: Experiment, generation_strategy: GenerationStrategy
) -> List[go.Figure]:
    """Extract standard plots for single-objective optimization.

    Extracts a list of plots from an Experiment and GenerationStrategy of general
    interest to an Ax user. Currently not supported are
    - TODO: multi-objective optimization
    - TODO: ChoiceParameter plots

    Args:
        - experiment: the Experiment from which to obtain standard plots.
        - generation_strategy: the GenerationStrategy used to suggest trial parameters
          in experiment

    Returns:
        - a plot of objective value vs. trial index, to show experiment progression
        - a plot of objective value vs. range parameter values, only included if the
          model associated with generation_strategy can create predictions. This
          consists of:

            - a plot_slice plot if the search space contains one range parameter
            - an interact_contour plot if the search space contains multiple
              range parameters

    """

    objective = not_none(experiment.optimization_config).objective
    if isinstance(objective, MultiObjective):
        logger.warning(
            "get_standard_plots does not currently support MultiObjective "
            "optimization experiments. Returning an empty list."
        )
        return []
    if isinstance(objective, ScalarizedObjective):
        logger.warning(
            "get_standard_plots does not currently support ScalarizedObjective "
            "optimization experiments. Returning an empty list."
        )
        return []

    if experiment.fetch_data().df.empty:
        logger.info(f"Experiment {experiment} does not yet have data, nothing to plot.")
        return []

    output_plot_list = []
    output_plot_list.append(
        _get_objective_trace_plot(
            experiment=experiment,
            metric_name=not_none(experiment.optimization_config).objective.metric.name,
            model_transitions=generation_strategy.model_transitions,
            optimization_direction=(
                "minimize"
                if not_none(experiment.optimization_config).objective.minimize
                else "maximize"
            ),
        )
    )

    try:
        output_plot_list.append(
            _get_objective_v_param_plot(
                search_space=experiment.search_space,
                model=not_none(generation_strategy.model),
                metric_name=not_none(
                    experiment.optimization_config
                ).objective.metric.name,
                trials=experiment.trials,
            )
        )
    except NotImplementedError:
        # Model does not implement `predict` method.
        pass

    return [plot for plot in output_plot_list if plot is not None]
Exemplo n.º 16
0
def get_observed_pareto_frontiers(
    experiment: Experiment,
    data: Optional[Data] = None,
    rel: bool = True,
) -> List[ParetoFrontierResults]:
    """
    Find all Pareto points from an experiment.

    Uses only values as observed in the data; no modeling is involved. Makes no
    assumption about the search space or types of parameters. If "data" is provided will
    use that, otherwise will use all data attached to the experiment.

    Uses all arms present in data; does not filter according to experiment
    search space.

    Assumes experiment has a multiobjective optimization config from which the
    objectives and outcome constraints will be extracted.

    Will generate a ParetoFrontierResults for every pair of metrics in the experiment's
    multiobjective optimization config.
    """
    if data is None:
        data = experiment.fetch_data()
    if experiment.optimization_config is None:
        raise ValueError("Experiment must have an optimization config")
    mb = get_tensor_converter_model(experiment=experiment, data=data)
    pareto_observations = observed_pareto_frontier(modelbridge=mb)
    # Convert to ParetoFrontierResults
    metric_names = [
        metric.name for metric in
        experiment.optimization_config.objective.metrics  # pyre-ignore
    ]
    pfr_means = {name: [] for name in metric_names}
    pfr_sems = {name: [] for name in metric_names}

    for obs in pareto_observations:
        for i, name in enumerate(obs.data.metric_names):
            pfr_means[name].append(obs.data.means[i])
            pfr_sems[name].append(np.sqrt(obs.data.covariance[i, i]))

    # Relativize as needed
    if rel and experiment.status_quo is not None:
        # Get status quo values
        sq_df = data.df[data.df["arm_name"] ==
                        experiment.status_quo.name  # pyre-ignore
                        ]
        sq_df = sq_df.to_dict(orient="list")  # pyre-ignore
        sq_means = {}
        sq_sems = {}
        for i, metric in enumerate(sq_df["metric_name"]):
            sq_means[metric] = sq_df["mean"][i]
            sq_sems[metric] = sq_df["sem"][i]
        # Relativize
        for name in metric_names:
            if np.isnan(sq_sems[name]) or np.isnan(pfr_sems[name]).any():
                # Just relativize means
                pfr_means[name] = [(mu / sq_means[name] - 1) * 100
                                   for mu in pfr_means[name]]
            else:
                # Use delta method
                pfr_means[name], pfr_sems[name] = relativize(
                    means_t=pfr_means[name],
                    sems_t=pfr_sems[name],
                    mean_c=sq_means[name],
                    sem_c=sq_sems[name],
                    as_percent=True,
                )
        absolute_metrics = []
    else:
        absolute_metrics = metric_names

    objective_thresholds = {}
    if experiment.optimization_config.objective_thresholds is not None:  # pyre-ignore
        for objth in experiment.optimization_config.objective_thresholds:
            is_rel = objth.metric.name not in absolute_metrics
            if objth.relative != is_rel:
                raise ValueError(
                    f"Objective threshold for {objth.metric.name} has "
                    f"rel={objth.relative} but was specified here as rel={is_rel}"
                )
            objective_thresholds[objth.metric.name] = objth.bound

    # Construct ParetoFrontResults for each pair
    pfr_list = []
    param_dicts = [obs.features.parameters for obs in pareto_observations]
    arm_names = [obs.arm_name for obs in pareto_observations]

    for metric_a, metric_b in combinations(metric_names, 2):
        pfr_list.append(
            ParetoFrontierResults(
                param_dicts=param_dicts,
                means=pfr_means,
                sems=pfr_sems,
                primary_metric=metric_a,
                secondary_metric=metric_b,
                absolute_metrics=absolute_metrics,
                objective_thresholds=objective_thresholds,
                arm_names=arm_names,
            ))
    return pfr_list
Exemplo n.º 17
0
def exp_to_df(
    exp: Experiment,
    metrics: Optional[List[Metric]] = None,
    key_components: Optional[List[str]] = None,
    run_metadata_fields: Optional[List[str]] = None,
    **kwargs: Any,
) -> pd.DataFrame:
    """Transforms an experiment to a DataFrame. Only supports Experiment and
    SimpleExperiment.

    Transforms an Experiment into a dataframe with rows keyed by trial_index
    and arm_name, metrics pivoted into one row.

    Args:
        exp: An Experiment that may have pending trials.
        metrics: Override list of metrics to return. Return all metrics if None.
        key_components: fields that combine to make a unique key corresponding
            to rows, similar to the list of fields passed to a GROUP BY.
            Defaults to ['arm_name', 'trial_index'].
        run_metadata_fields: fields to extract from trial.run_metadata for trial
            in experiment.trials. If there are multiple arms per trial, these
            fields will be replicated across the arms of a trial.
        **kwargs: Custom named arguments, useful for passing complex
            objects from call-site to the `fetch_data` callback.

    Returns:
        DataFrame: A dataframe of inputs and metrics by trial and arm.
    """

    def prep_return(
        df: pd.DataFrame, drop_col: str, sort_by: List[str]
    ) -> pd.DataFrame:
        return not_none(not_none(df.drop(drop_col, axis=1)).sort_values(sort_by))

    key_components = key_components or ["trial_index", "arm_name"]

    # Accept Experiment and SimpleExperiment
    if isinstance(exp, MultiTypeExperiment):
        raise ValueError("Cannot transform MultiTypeExperiments to DataFrames.")

    results = exp.fetch_data(metrics, **kwargs).df
    if len(results.index) == 0:  # Handle empty case
        return results

    # create key column from key_components
    key_col = "-".join(key_components)
    key_vals = results[key_components[0]].astype("str")
    for key in key_components[1:]:
        key_vals = key_vals + results[key].astype("str")
    results[key_col] = key_vals

    # pivot dataframe from long to wide
    metric_vals = results.pivot(
        index=key_col, columns="metric_name", values="mean"
    ).reset_index()

    # dedupe results by key_components
    metadata = results[key_components + [key_col]].drop_duplicates()
    metric_and_metadata = pd.merge(metric_vals, metadata, on=key_col)

    # get params of each arm and merge with deduped results
    arm_names_and_params = pd.DataFrame(
        [{"arm_name": name, **arm.parameters} for name, arm in exp.arms_by_name.items()]
    )
    exp_df = pd.merge(metric_and_metadata, arm_names_and_params, on="arm_name")

    # add trial status
    trials = exp.trials.items()
    trial_to_status = {index: trial.status.name for index, trial in trials}
    exp_df["trial_status"] = [trial_to_status[key] for key in exp_df.trial_index]

    # if no run_metadata fields are requested, return exp_df so far
    if run_metadata_fields is None:
        return prep_return(df=exp_df, drop_col=key_col, sort_by=key_components)
    if not isinstance(run_metadata_fields, list):
        raise ValueError("run_metadata_fields must be List[str] or None.")

    # add additional run_metadata fields
    for field in run_metadata_fields:
        trial_to_metadata_field = {
            index: (trial.run_metadata[field] if field in trial.run_metadata else None)
            for index, trial in trials
        }
        if any(trial_to_metadata_field.values()):  # field present for any trial
            if not all(trial_to_metadata_field.values()):  # not present for all trials
                logger.warning(
                    f"Field {field} missing for some trials' run_metadata. "
                    "Returning None when missing."
                )
            exp_df[field] = [trial_to_metadata_field[key] for key in exp_df.trial_index]
        else:
            logger.warning(
                f"Field {field} missing for all trials' run_metadata. "
                "Not appending column."
            )
    return prep_return(df=exp_df, drop_col=key_col, sort_by=key_components)
Exemplo n.º 18
0
    def testStatusQuoSetter(self):
        sq_parameters = self.experiment.status_quo.parameters
        self.experiment.status_quo = None
        self.assertIsNone(self.experiment.status_quo)

        # Verify normal update
        sq_parameters["w"] = 3.5
        self.experiment.status_quo = Arm(sq_parameters)
        self.assertEqual(self.experiment.status_quo.parameters["w"], 3.5)
        self.assertEqual(self.experiment.status_quo.name, "status_quo")

        # Verify all None values
        self.experiment.status_quo = Arm({n: None for n in sq_parameters.keys()})
        self.assertIsNone(self.experiment.status_quo.parameters["w"])

        # Try extra param
        sq_parameters["a"] = 4
        with self.assertRaises(ValueError):
            self.experiment.status_quo = Arm(sq_parameters)

        # Try wrong type
        sq_parameters.pop("a")
        sq_parameters["w"] = "hello"
        with self.assertRaises(ValueError):
            self.experiment.status_quo = Arm(sq_parameters)

        # Verify arms_by_signature only contains status_quo
        self.assertEqual(len(self.experiment.arms_by_signature), 1)

        # Change status quo, verify still just 1 arm
        sq_parameters["w"] = 3.6
        self.experiment.status_quo = Arm(sq_parameters)
        self.assertEqual(len(self.experiment.arms_by_signature), 1)

        # Make a batch, add status quo to it, then change exp status quo, verify 2 arms
        batch = self.experiment.new_batch_trial()
        batch.set_status_quo_with_weight(self.experiment.status_quo, 1)
        sq_parameters["w"] = 3.7
        self.experiment.status_quo = Arm(sq_parameters)
        self.assertEqual(len(self.experiment.arms_by_signature), 2)
        self.assertEqual(self.experiment.status_quo.name, "status_quo_e0")

        # Try missing param
        sq_parameters.pop("w")
        with self.assertRaises(ValueError):
            self.experiment.status_quo = Arm(sq_parameters)

        # Actually name the status quo.
        exp = Experiment(
            name="test3",
            search_space=get_branin_search_space(),
            tracking_metrics=[BraninMetric(name="b", param_names=["x1", "x2"])],
            runner=SyntheticRunner(),
        )
        batch = exp.new_batch_trial()
        arms = get_branin_arms(n=1, seed=0)
        batch.add_arms_and_weights(arms=arms)
        self.assertIsNone(exp.status_quo)
        exp.status_quo = arms[0]
        self.assertEqual(exp.status_quo.name, "0_0")

        # Try setting sq to existing arm with different name
        with self.assertRaises(ValueError):
            exp.status_quo = Arm(arms[0].parameters, name="new_name")
Exemplo n.º 19
0
 def testEmptyMetrics(self):
     empty_experiment = Experiment(name="test_experiment",
                                   search_space=get_search_space())
     self.assertEqual(empty_experiment.num_trials, 0)
     with self.assertRaises(ValueError):
         empty_experiment.fetch_data()
     batch = empty_experiment.new_batch_trial()
     batch.mark_running(no_runner_required=True)
     self.assertEqual(empty_experiment.num_trials, 1)
     with self.assertRaises(ValueError):
         batch.fetch_data()
     empty_experiment.add_tracking_metric(Metric(name="ax_test_metric"))
     self.assertTrue(empty_experiment.fetch_data().df.empty)
     empty_experiment.attach_data(get_data())
     batch.mark_completed()
     self.assertFalse(empty_experiment.fetch_data().df.empty)
Exemplo n.º 20
0
    def testDerelativizeTransform(self, mock_predict, mock_fit,
                                  mock_observations_from_data):
        t = Derelativize(search_space=None,
                         observation_features=None,
                         observation_data=None)

        # ModelBridge with in-design status quo
        search_space = SearchSpace(parameters=[
            RangeParameter("x", ParameterType.FLOAT, 0, 20),
            RangeParameter("y", ParameterType.FLOAT, 0, 20),
        ])
        g = ModelBridge(
            search_space=search_space,
            model=None,
            transforms=[],
            experiment=Experiment(search_space, "test"),
            data=Data(),
            status_quo_name="1_1",
        )

        # Test with no relative constraints
        objective = Objective(Metric("c"))
        oc = OptimizationConfig(
            objective=objective,
            outcome_constraints=[
                OutcomeConstraint(Metric("a"),
                                  ComparisonOp.LEQ,
                                  bound=2,
                                  relative=False)
            ],
        )
        oc2 = t.transform_optimization_config(oc, g, None)
        self.assertTrue(oc == oc2)

        # Test with relative constraint, in-design status quo
        oc = OptimizationConfig(
            objective=objective,
            outcome_constraints=[
                OutcomeConstraint(Metric("a"),
                                  ComparisonOp.LEQ,
                                  bound=2,
                                  relative=False),
                OutcomeConstraint(Metric("b"),
                                  ComparisonOp.LEQ,
                                  bound=-10,
                                  relative=True),
            ],
        )
        oc = t.transform_optimization_config(oc, g, None)
        self.assertTrue(oc.outcome_constraints == [
            OutcomeConstraint(
                Metric("a"), ComparisonOp.LEQ, bound=2, relative=False),
            OutcomeConstraint(
                Metric("b"), ComparisonOp.LEQ, bound=4.5, relative=False),
        ])
        obsf = mock_predict.mock_calls[0][1][1][0]
        obsf2 = ObservationFeatures(parameters={"x": 2.0, "y": 10.0})
        self.assertTrue(obsf == obsf2)

        # Test with relative constraint, out-of-design status quo
        mock_predict.side_effect = Exception()
        g = ModelBridge(
            search_space=search_space,
            model=None,
            transforms=[],
            experiment=Experiment(search_space, "test"),
            data=Data(),
            status_quo_name="1_2",
        )
        oc = OptimizationConfig(
            objective=objective,
            outcome_constraints=[
                OutcomeConstraint(Metric("a"),
                                  ComparisonOp.LEQ,
                                  bound=2,
                                  relative=False),
                OutcomeConstraint(Metric("b"),
                                  ComparisonOp.LEQ,
                                  bound=-10,
                                  relative=True),
            ],
        )
        oc = t.transform_optimization_config(oc, g, None)
        self.assertTrue(oc.outcome_constraints == [
            OutcomeConstraint(
                Metric("a"), ComparisonOp.LEQ, bound=2, relative=False),
            OutcomeConstraint(
                Metric("b"), ComparisonOp.LEQ, bound=3.6, relative=False),
        ])
        self.assertEqual(mock_predict.call_count, 2)

        # Raises error if predict fails with in-design status quo
        g = ModelBridge(search_space, None, [], status_quo_name="1_1")
        oc = OptimizationConfig(
            objective=objective,
            outcome_constraints=[
                OutcomeConstraint(Metric("a"),
                                  ComparisonOp.LEQ,
                                  bound=2,
                                  relative=False),
                OutcomeConstraint(Metric("b"),
                                  ComparisonOp.LEQ,
                                  bound=-10,
                                  relative=True),
            ],
        )
        with self.assertRaises(Exception):
            oc = t.transform_optimization_config(oc, g, None)

        # Raises error with relative constraint, no status quo
        exp = Experiment(search_space, "name")
        g = ModelBridge(search_space, None, [], exp)
        with self.assertRaises(ValueError):
            oc = t.transform_optimization_config(oc, g, None)

        # Raises error with relative constraint, no modelbridge
        with self.assertRaises(ValueError):
            oc = t.transform_optimization_config(oc, None, None)
Exemplo n.º 21
0
def get_standard_plots(
    experiment: Experiment,
    model: Optional[ModelBridge],
    data: Optional[Data] = None,
    model_transitions: Optional[List[int]] = None,
    true_objective_metric_name: Optional[str] = None,
) -> List[go.Figure]:
    """Extract standard plots for single-objective optimization.

    Extracts a list of plots from an ``Experiment`` and ``ModelBridge`` of general
    interest to an Ax user. Currently not supported are
    - TODO: multi-objective optimization
    - TODO: ChoiceParameter plots

    Args:
        - experiment: The ``Experiment`` from which to obtain standard plots.
        - model: The ``ModelBridge`` used to suggest trial parameters.
        - data: If specified, data, to which to fit the model before generating plots.
        - model_transitions: The arm numbers at which shifts in generation_strategy
            occur.

    Returns:
        - a plot of objective value vs. trial index, to show experiment progression
        - a plot of objective value vs. range parameter values, only included if the
          model associated with generation_strategy can create predictions. This
          consists of:

            - a plot_slice plot if the search space contains one range parameter
            - an interact_contour plot if the search space contains multiple
              range parameters

    """
    if (true_objective_metric_name is not None
            and true_objective_metric_name not in experiment.metrics.keys()):
        raise ValueError(
            f"true_objective_metric_name='{true_objective_metric_name}' is not present "
            f"in experiment.metrics={experiment.metrics}. Please add a valid "
            "true_objective_metric_name or remove the optional parameter to get "
            "standard plots.")

    objective = not_none(experiment.optimization_config).objective
    if isinstance(objective, ScalarizedObjective):
        logger.warning(
            "get_standard_plots does not currently support ScalarizedObjective "
            "optimization experiments. Returning an empty list.")
        return []

    if data is None:
        data = experiment.fetch_data()

    if data.df.empty:
        logger.info(
            f"Experiment {experiment} does not yet have data, nothing to plot."
        )
        return []

    output_plot_list = []
    output_plot_list.extend(
        _get_objective_trace_plot(
            experiment=experiment,
            data=data,
            model_transitions=model_transitions
            if model_transitions is not None else [],
            true_objective_metric_name=true_objective_metric_name,
        ))

    # Objective vs. parameter plot requires a `Model`, so add it only if model
    # is alrady available. In cases where initially custom trials are attached,
    # model might not yet be set on the generation strategy.
    if model:
        # TODO: Check if model can predict in favor of try/catch.
        try:
            if true_objective_metric_name is not None:
                output_plot_list.append(
                    _objective_vs_true_objective_scatter(
                        model=model,
                        objective_metric_name=objective.metric_names[0],
                        true_objective_metric_name=true_objective_metric_name,
                    ))
            output_plot_list.extend(
                _get_objective_v_param_plots(
                    experiment=experiment,
                    model=model,
                ))
            output_plot_list.extend(_get_cross_validation_plots(model=model))
            feature_importance_plot = plot_feature_importance_by_feature_plotly(
                model=model,
                relative=False,
                caption=FEATURE_IMPORTANCE_CAPTION)
            feature_importance_plot.layout.title = "[ADVANCED] " + str(
                # pyre-fixme[16]: go.Figure has no attribute `layout`
                feature_importance_plot.layout.title.text)
            output_plot_list.append(feature_importance_plot)
            output_plot_list.append(
                interact_fitted_plotly(model=model, rel=False))
        except NotImplementedError:
            # Model does not implement `predict` method.
            pass

    return [plot for plot in output_plot_list if plot is not None]
Exemplo n.º 22
0
def make_experiment(
    parameters: List[TParameterRepresentation],
    name: Optional[str] = None,
    parameter_constraints: Optional[List[str]] = None,
    outcome_constraints: Optional[List[str]] = None,
    status_quo: Optional[TParameterization] = None,
    experiment_type: Optional[str] = None,
    tracking_metric_names: Optional[List[str]] = None,
    # Single-objective optimization arguments:
    objective_name: Optional[str] = None,
    minimize: bool = False,
    # Multi-objective optimization arguments:
    objectives: Optional[Dict[str, str]] = None,
    objective_thresholds: Optional[List[str]] = None,
    support_intermediate_data: Optional[bool] = False,
    immutable_search_space_and_opt_config: Optional[bool] = True,
) -> Experiment:
    """Instantiation wrapper that allows for Ax `Experiment` creation
    without importing or instantiating any Ax classes.

    Args:
        parameters: List of dictionaries representing parameters in the
            experiment search space.
            Required elements in the dictionaries are:
            1. "name" (name of parameter, string),
            2. "type" (type of parameter: "range", "fixed", or "choice", string),
            and one of the following:
            3a. "bounds" for range parameters (list of two values, lower bound
            first),
            3b. "values" for choice parameters (list of values), or
            3c. "value" for fixed parameters (single value).
            Optional elements are:
            1. "log_scale" (for float-valued range parameters, bool),
            2. "value_type" (to specify type that values of this parameter should
            take; expects "float", "int", "bool" or "str"),
            3. "is_fidelity" (bool) and "target_value" (float) for fidelity
            parameters,
            4. "is_ordered" (bool) for choice parameters,
            5. "is_task" (bool) for task parameters, and
            6. "digits" (int) for float-valued range parameters.
        name: Name of the experiment to be created.
        parameter_constraints: List of string representation of parameter
            constraints, such as "x3 >= x4" or "-x3 + 2*x4 - 3.5*x5 >= 2". For
            the latter constraints, any number of arguments is accepted, and
            acceptable operators are "<=" and ">=".
        parameter_constraints: List of string representation of parameter
                constraints, such as "x3 >= x4" or "-x3 + 2*x4 - 3.5*x5 >= 2". For
                the latter constraints, any number of arguments is accepted, and
                acceptable operators are "<=" and ">=".
        outcome_constraints: List of string representation of outcome
            constraints of form "metric_name >= bound", like "m1 <= 3."
        status_quo: Parameterization of the current state of the system.
            If set, this will be added to each trial to be evaluated alongside
            test configurations.
        experiment_type: String indicating type of the experiment (e.g. name of
            a product in which it is used), if any.
        tracking_metric_names: Names of additional tracking metrics not used for
            optimization.
        objective_name: Name of the metric used as objective in this experiment,
            if experiment is single-objective optimization.
        minimize: Whether this experiment represents a minimization problem, if
            experiment is a single-objective optimization.
        objectives: Mapping from an objective name to "minimize" or "maximize"
            representing the direction for that objective. Used only for
            multi-objective optimization experiments.
        objective_thresholds: A list of objective threshold constraints for multi-
            objective optimization, in the same string format as `outcome_constraints`
            argument.
        support_intermediate_data: whether trials may report metrics results for
            incomplete runs.
        immutable_search_space_and_opt_config: Whether it's possible to update the
            search space and optimization config on this experiment after creation.
            Defaults to True. If set to True, we won't store or load copies of the
            search space and optimization config on each generator run, which will
            improve storage performance.
    """
    if objective_name is not None and (objectives is not None
                                       or objective_thresholds is not None):
        raise UnsupportedError(
            "Ambiguous objective definition: for single-objective optimization "
            "`objective_name` and `minimize` arguments expected. For multi-objective "
            "optimization `objectives` and `objective_thresholds` arguments expected."
        )

    status_quo_arm = None if status_quo is None else Arm(parameters=status_quo)

    # TODO(jej): Needs to be decided per-metric when supporting heterogenous data.
    metric_cls = MapMetric if support_intermediate_data else Metric
    if objectives is None:
        optimization_config = OptimizationConfig(
            objective=Objective(
                metric=metric_cls(
                    name=objective_name or DEFAULT_OBJECTIVE_NAME,
                    lower_is_better=minimize,
                ),
                minimize=minimize,
            ),
            outcome_constraints=make_outcome_constraints(
                outcome_constraints or [], status_quo_arm is not None),
        )
    else:
        optimization_config = make_optimization_config(
            objectives,
            objective_thresholds or [],
            outcome_constraints or [],
            status_quo_arm is not None,
        )

    tracking_metrics = (None if tracking_metric_names is None else [
        Metric(name=metric_name) for metric_name in tracking_metric_names
    ])

    default_data_type = (DataType.MAP_DATA
                         if support_intermediate_data else DataType.DATA)

    immutable_ss_and_oc = immutable_search_space_and_opt_config
    properties = ({} if not immutable_search_space_and_opt_config else {
        Keys.IMMUTABLE_SEARCH_SPACE_AND_OPT_CONF.value:
        immutable_ss_and_oc
    })

    return Experiment(
        name=name,
        search_space=make_search_space(parameters, parameter_constraints
                                       or []),
        optimization_config=optimization_config,
        status_quo=status_quo_arm,
        experiment_type=experiment_type,
        tracking_metrics=tracking_metrics,
        default_data_type=default_data_type,
        properties=properties,
    )
Exemplo n.º 23
0
def exp_to_df(
    exp: Experiment,
    metrics: Optional[List[Metric]] = None,
    run_metadata_fields: Optional[List[str]] = None,
    trial_properties_fields: Optional[List[str]] = None,
    **kwargs: Any,
) -> pd.DataFrame:
    """Transforms an experiment to a DataFrame with rows keyed by trial_index
    and arm_name, metrics pivoted into one row. If the pivot results in more than
    one row per arm (or one row per ``arm * map_keys`` combination if ``map_keys`` are
    present), results are omitted and warning is produced. Only supports
    ``Experiment``.

    Transforms an ``Experiment`` into a ``pd.DataFrame``.

    Args:
        exp: An ``Experiment`` that may have pending trials.
        metrics: Override list of metrics to return. Return all metrics if ``None``.
        run_metadata_fields: fields to extract from ``trial.run_metadata`` for trial
            in ``experiment.trials``. If there are multiple arms per trial, these
            fields will be replicated across the arms of a trial.
        trial_properties_fields: fields to extract from ``trial._properties`` for trial
            in ``experiment.trials``. If there are multiple arms per trial, these
            fields will be replicated across the arms of a trial. Output columns names
            will be prepended with ``"trial_properties_"``.
        **kwargs: Custom named arguments, useful for passing complex
            objects from call-site to the `fetch_data` callback.

    Returns:
        DataFrame: A dataframe of inputs, metadata and metrics by trial and arm (and
        ``map_keys``, if present). If no trials are available, returns an empty
        dataframe. If no metric ouputs are available, returns a dataframe of inputs and
        metadata.
    """

    # Accept Experiment and SimpleExperiment
    if isinstance(exp, MultiTypeExperiment):
        raise ValueError(
            "Cannot transform MultiTypeExperiments to DataFrames.")

    key_components = ["trial_index", "arm_name"]

    # Get each trial-arm with parameters
    arms_df = pd.DataFrame()
    for trial_index, trial in exp.trials.items():
        for arm in trial.arms:
            arms_df = arms_df.append(
                {
                    "arm_name": arm.name,
                    "trial_index": trial_index,
                    **arm.parameters
                },
                ignore_index=True,
            )

    # Fetch results; in case arms_df is empty, return empty results (legacy behavior)
    data = exp.fetch_data(metrics, **kwargs)
    results = data.df
    if len(arms_df.index) == 0:
        if len(results.index) != 0:
            raise ValueError(
                "exp.fetch_data().df returned more rows than there are experimental "
                "arms. This is an inconsistent experimental state. Please report to "
                "Ax support.")
        return results

    # Create key column from key_components
    arms_df["trial_index"] = arms_df["trial_index"].astype(int)

    # Add trial status
    trials = exp.trials.items()
    trial_to_status = {index: trial.status.name for index, trial in trials}
    _merge_trials_dict_with_df(df=arms_df,
                               trials_dict=trial_to_status,
                               column_name="trial_status")

    # Add generation_method, accounting for the generic case that generator_runs is of
    # arbitrary length. Repeated methods within a trial are condensed via `set` and an
    # empty set will yield "Unknown" as the method.
    trial_to_generation_method = {
        trial_index: _get_generation_method_str(trial)
        for trial_index, trial in trials
    }

    _merge_trials_dict_with_df(
        df=arms_df,
        trials_dict=trial_to_generation_method,
        column_name="generation_method",
    )

    # Add any trial properties fields to arms_df
    if trial_properties_fields is not None:
        # add trial._properties fields
        for field in trial_properties_fields:
            trial_to_properties_field = {
                trial_index: (trial._properties[field]
                              if field in trial._properties else None)
                for trial_index, trial in trials
            }
            _merge_trials_dict_with_df(
                df=arms_df,
                trials_dict=trial_to_properties_field,
                column_name="trial_properties_" + field,
            )

    # Add any run_metadata fields to arms_df
    if run_metadata_fields is not None:
        # add run_metadata fields
        for field in run_metadata_fields:
            trial_to_metadata_field = {
                trial_index: (trial.run_metadata[field]
                              if field in trial.run_metadata else None)
                for trial_index, trial in trials
            }
            _merge_trials_dict_with_df(
                df=arms_df,
                trials_dict=trial_to_metadata_field,
                column_name=field,
            )
    exp_df = _merge_results_if_no_duplicates(
        arms_df=arms_df,
        data=data,
        key_components=key_components,
        metrics=metrics or list(exp.metrics.values()),
    )

    return not_none(not_none(exp_df).sort_values(["trial_index"]))
Exemplo n.º 24
0
def benchmark_minimize_callable(
    problem: BenchmarkProblem,
    num_trials: int,
    method_name: str,
    replication_index: Optional[int] = None,
) -> Tuple[Experiment, Callable[[List[float]], float]]:
    """
    An interface for evaluating external methods on Ax benchmark problems. The
    arms run and performance will be tracked by Ax, so the external method can
    be evaluated alongside Ax methods.

    It is designed around methods that implement an interface like
    scipy.optimize.minimize. This function will return a callable evaluation
    function that takes in an array of parameter values and returns a float
    objective value. The evaluation function should always be minimized: if the
    benchmark problem is a maximization problem, then the value returned by
    the evaluation function will be negated so it can be used directly by
    methods that minimize. This callable can be given to an external
    minimization function, and Ax will track all of the calls made to it and
    the arms that were evaluated.

    This will also return an Experiment object that will track the arms
    evaluated by the external method in the same way as done for Ax
    internal benchmarks. This function should thus be used for each benchmark
    replication.

    Args:
        problem: The Ax benchmark problem to be used to construct the
            evalutaion function.
        num_trials: The maximum number of trials for a benchmark run.
        method_name: Name of the method being tested.
        replication_index: Replicate number, if multiple replicates are being
            run.
    """
    # Some validation
    if isinstance(problem, SimpleBenchmarkProblem):
        raise NonRetryableBenchmarkingError(
            "`SimpleBenchmarkProblem` not supported.")
    if not all(
            isinstance(p, RangeParameter)
            for p in problem.search_space.parameters.values()):
        raise NonRetryableBenchmarkingError(
            "Only continuous search spaces supported.")
    if any(p.log_scale
           for p in problem.search_space.parameters.values()  # pyre-ignore
           ):
        raise NonRetryableBenchmarkingError(
            "Log-scale parameters not supported.")

    # Create Ax experiment
    experiment_name = f"{method_name}_on_{problem.name}"
    if replication_index is not None:
        experiment_name += f"__v{replication_index}"
    experiment = Experiment(
        name=experiment_name,
        search_space=problem.search_space,
        optimization_config=problem.optimization_config,
        runner=SyntheticRunner(),
    )
    max_trials = num_trials  # to be used below

    # Construct the evaluation function
    def evaluation_function(x: List[float]) -> float:
        # Check if we have exhuasted the evaluation budget
        if len(experiment.trials) >= max_trials:
            raise ValueError(
                f"Evaluation budget ({max_trials} trials) exhuasted.")

        # Create an ObservationFeatures
        param_dict = {
            pname: x[i]
            for i, pname in enumerate(problem.search_space.parameters.keys())
        }
        obsf = ObservationFeatures(parameters=param_dict)  # pyre-ignore
        # Get the time since last call
        num_trials = len(experiment.trials)
        if num_trials == 0:
            gen_time = None
        else:
            previous_ts = experiment.trials[num_trials -
                                            1].time_created.timestamp()
            gen_time = time.time() - previous_ts
        # Create a GR
        arms, candidate_metadata_by_arm_signature = gen_arms(
            observation_features=[obsf],
            arms_by_signature=experiment.arms_by_signature)
        gr = GeneratorRun(
            arms=arms,
            gen_time=gen_time,
            candidate_metadata_by_arm_signature=
            candidate_metadata_by_arm_signature,
        )
        # Add it as a trial
        trial = experiment.new_trial().add_generator_run(gr).run()
        # Evaluate function
        df = trial.fetch_data().df
        if len(df) > 1:
            raise Exception(
                "Does not support multiple outcomes")  # pragma: no cover
        obj = float(df["mean"].values[0])
        if not problem.optimization_config.objective.minimize:
            obj = -obj
        return obj

    return experiment, evaluation_function
Exemplo n.º 25
0
    def gen(
        self,
        experiment: Experiment,
        data: Optional[Data] = None,
        n: int = 1,
        **kwargs: Any,
    ) -> GeneratorRun:
        """Produce the next points in the experiment."""
        self._set_experiment(experiment=experiment)
        new_arm_signatures = set()
        data = data or experiment.fetch_data()
        if data is not None and not data.df.empty:
            if self._data.df.empty:
                new_data = data.df
            else:
                # Select only the new data to determine how many new arms were
                # evaluated since the generation strategy was last updated with
                # data (find rows that are in `data.df`, but not in `self._data.df`)
                merged = data.df.merge(
                    self._data.df,
                    on=[
                        "arm_name", "trial_index", "metric_name", "mean", "sem"
                    ],
                    how="left",
                    indicator=True,
                )
                new_data = merged[merged["_merge"] == "left_only"]
            # Get arm signatures for each entry in data that the GS hasn't seen yet.
            new_arm_signatures = {
                not_none(experiment.arms_by_name.get(
                    row["arm_name"])).signature
                for _, row in new_data.iterrows()
                if (row["arm_name"] in experiment.arms_by_name
                    and not not_none(experiment.trials.get(
                        row["trial_index"])).status.is_failed)
            }

        enough_observed = (len(self._observed) + len(new_arm_signatures)
                           ) >= self._curr.min_arms_observed
        unlimited_arms = self._curr.num_arms == -1
        enough_generated = (not unlimited_arms
                            and len(self._generated) >= self._curr.num_arms)

        # Check that minimum observed_arms is satisfied if it's enforced.
        if self._curr.enforce_num_arms and enough_generated and not enough_observed:
            raise DataRequiredError(
                "All trials for current model have been generated, but not enough "
                "data has been observed to fit next model. Try again when more data "
                "are available.")
            # TODO[Lena, T44021164]: take into account failed trials. Potentially
            # reduce `_generated` count when a trial mentioned in new data failed.

        lgr = self.last_generator_run

        if enough_generated and enough_observed:
            # Change to the next model.
            self._change_model(experiment=experiment, data=data)
        elif lgr is not None and lgr._model_state_after_gen is not None:
            model_state = not_none(lgr._model_state_after_gen)
            self._set_current_model(experiment=experiment,
                                    data=data,
                                    **model_state)
        else:
            self._set_current_model(experiment=experiment, data=data)

        model = not_none(self._model)
        kwargs = consolidate_kwargs(
            kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
            keywords=get_function_argument_names(not_none(self._model).gen),
        )
        gen_run = model.gen(n=n, **kwargs)

        # If nothing failed, update known data, _generated, and _observed.
        self._data = data
        self._generated.extend([arm.signature for arm in gen_run.arms])
        self._observed.extend(new_arm_signatures)
        self._generator_runs.append(gen_run)
        return gen_run
Exemplo n.º 26
0
def get_pareto_optimal_parameters(
    experiment: Experiment,
    generation_strategy: GenerationStrategy,
    use_model_predictions: bool = True,
) -> Optional[Dict[int, Tuple[TParameterization, TModelPredictArm]]]:
    """Identifies the best parameterizations tried in the experiment so far,
    using model predictions if ``use_model_predictions`` is true and using
    observed values from the experiment otherwise. By default, uses model
    predictions to account for observation noise.

    NOTE: The format of this method's output is as follows:
    { trial_index --> (parameterization, (means, covariances) }, where means
    are a dictionary of form { metric_name --> metric_mean } and covariances
    are a nested dictionary of form
    { one_metric_name --> { another_metric_name: covariance } }.

    Args:
        experiment: Experiment, from which to find Pareto-optimal arms.
        generation_strategy: Generation strategy containing the modelbridge.
        use_model_predictions: Whether to extract the Pareto frontier using
            model predictions or directly observed values. If ``True``,
            the metric means and covariances in this method's output will
            also be based on model predictions and may differ from the
            observed values.

    Returns:
        ``None`` if it was not possible to extract the Pareto frontier,
        otherwise a mapping from trial index to the tuple of:
        - the parameterization of the arm in that trial,
        - two-item tuple of metric means dictionary and covariance matrix
            (model-predicted if ``use_model_predictions=True`` and observed
            otherwise).
    """
    # Validate aspects of the experiment: that it is a MOO experiment and
    # that the current model can be used to produce the Pareto frontier.
    if not not_none(experiment.optimization_config).is_moo_problem:
        raise UnsupportedError(
            "Please use `get_best_parameters` for single-objective problems.")

    moo_optimization_config = checked_cast(MultiObjectiveOptimizationConfig,
                                           experiment.optimization_config)
    if moo_optimization_config.outcome_constraints:
        # TODO[drfreund]: Test this flow and remove error.
        raise NotImplementedError(
            "Support for outcome constraints is currently under development.")

    # Extract or instantiate modelbridge to use for Pareto frontier extraction.
    mb = generation_strategy.model
    if mb is None or not isinstance(mb, MultiObjectiveTorchModelBridge):
        logger.info(
            "Can only extract a Pareto frontier using a multi-objective model bridge"
            f", but currently used model bridge is: {mb} of type {type(mb)}. Will "
            "use `Models.MOO` instead to extract Pareto frontier.")
        mb = checked_cast(
            MultiObjectiveTorchModelBridge,
            Models.MOO(experiment=experiment,
                       data=checked_cast(Data, experiment.lookup_data())),
        )
    else:
        # Make sure the model is up-to-date with the most recent data.
        generation_strategy._set_or_update_current_model(data=None)

    # If objective thresholds are not specified in optimization config, extract
    # the inferred ones if possible or infer them anew if not.
    objective_thresholds_override = None
    if not moo_optimization_config.objective_thresholds:
        lgr = generation_strategy.last_generator_run
        if lgr and lgr.gen_metadata and "objective_thresholds" in lgr.gen_metadata:
            objective_thresholds_override = lgr.gen_metadata[
                "objective_thresholds"]
        objective_thresholds_override = mb.infer_objective_thresholds(
            search_space=experiment.search_space,
            optimization_config=experiment.optimization_config,
            fixed_features=None,
        )
        logger.info(
            f"Using inferred objective thresholds: {objective_thresholds_override}, "
            "as objective thresholds were not specified as part of the optimization "
            "configuration on the experiment.")

    # Extract the Pareto frontier and format it as follows:
    # { trial_index --> (parameterization, (means, covariances) }
    pareto_util = predicted_pareto if use_model_predictions else observed_pareto
    pareto_optimal_observations = pareto_util(
        modelbridge=mb, objective_thresholds=objective_thresholds_override)
    return {
        int(not_none(obs.features.trial_index)): (
            obs.features.parameters,
            (obs.data.means_dict, obs.data.covariance_matrix),
        )
        for obs in pareto_optimal_observations
    }
Exemplo n.º 27
0
    def should_stop_trials_early(
        self,
        trial_indices: Set[int],
        experiment: Experiment,
        **kwargs: Dict[str, Any],
    ) -> Dict[int, Optional[str]]:
        """Stop a trial if its performance is in the bottom `percentile_threshold`
        of the trials at the same step.

        Args:
            trial_indices: Indices of candidate trials to consider for early stopping.
            experiment: Experiment that contains the trials and other contextual data.

        Returns:
            A dictionary mapping trial indices that should be early stopped to
            (optional) messages with the associated reason. An empty dictionary
            means no suggested updates to any trial's status.
        """
        if experiment.optimization_config is None:
            raise UnsupportedError(  # pragma: no cover
                "Experiment must have an optimization config in order to use an "
                "early stopping strategy."
            )

        optimization_config = not_none(experiment.optimization_config)
        objective_name = optimization_config.objective.metric.name
        minimize = optimization_config.objective.minimize

        data = experiment.lookup_data(keep_latest_map_values_only=False)
        if data.df.empty:
            logger.info(
                "PercentileEarlyStoppingStrategy received empty data. "
                "Not stopping any trials."
            )
            return {}

        if not isinstance(data, MapData):
            raise ValueError(
                "PercentileEarlyStoppingStrategy expects MapData, but the "
                f"data attached to experiment is of type {type(data)}."
            )

        map_keys = data.map_keys
        if len(map_keys) > 1:
            raise ValueError(  # pragma: no cover
                "PercentileEarlyStoppingStrategy expects MapData with a single "
                "map key, but the data attached to the experiment has multiple: "
                f"{data.map_keys}."
            )
        map_key = map_keys[0]

        df = data.df
        metric_to_aligned_means, _ = align_partial_results(
            df=df,
            progr_key=map_key,
            metrics=[objective_name],
        )
        aligned_means = metric_to_aligned_means[objective_name]
        decisions = {
            trial_index: self.should_stop_trial_early(
                trial_index=trial_index,
                experiment=experiment,
                df=aligned_means,
                percentile_threshold=self.percentile_threshold,
                map_key=map_key,
                minimize=minimize,
            )
            for trial_index in trial_indices
        }
        return {
            trial_index: reason
            for trial_index, (should_stop, reason) in decisions.items()
            if should_stop
        }
Exemplo n.º 28
0
    def _init_experiment_from_sqa(self,
                                  experiment_sqa: SQAExperiment) -> Experiment:
        """First step of conversion within experiment_from_sqa."""
        opt_config, tracking_metrics = self.opt_config_and_tracking_metrics_from_sqa(
            metrics_sqa=experiment_sqa.metrics)
        search_space = self.search_space_from_sqa(
            parameters_sqa=experiment_sqa.parameters,
            parameter_constraints_sqa=experiment_sqa.parameter_constraints,
        )
        if search_space is None:
            raise SQADecodeError(  # pragma: no cover
                "Experiment SearchSpace cannot be None.")
        status_quo = (
            Arm(
                # pyre-fixme[6]: Expected `Dict[str, Optional[Union[bool, float,
                #  int, str]]]` for 1st param but got `Optional[Dict[str,
                #  Optional[Union[bool, float, int, str]]]]`.
                parameters=experiment_sqa.status_quo_parameters,
                name=experiment_sqa.status_quo_name,
            ) if experiment_sqa.status_quo_parameters is not None else None)
        if len(experiment_sqa.runners) == 0:
            runner = None
        elif len(experiment_sqa.runners) == 1:
            runner = self.runner_from_sqa(experiment_sqa.runners[0])
        else:
            raise ValueError(  # pragma: no cover
                "Multiple runners on experiment "
                "only supported for MultiTypeExperiment.")

        # `experiment_sqa.properties` is `sqlalchemy.ext.mutable.MutableDict`
        # so need to convert it to regular dict.
        properties = dict(experiment_sqa.properties or {})
        # Remove 'subclass' from experiment's properties, since its only
        # used for decoding to the correct experiment subclass in storage.
        subclass = properties.pop(Keys.SUBCLASS, None)
        if subclass == "SimpleExperiment":
            if opt_config is None:
                raise SQADecodeError(  # pragma: no cover
                    "SimpleExperiment must have an optimization config.")
            experiment = SimpleExperiment(
                name=experiment_sqa.name,
                search_space=search_space,
                objective_name=opt_config.objective.metric.name,
                minimize=opt_config.objective.minimize,
                outcome_constraints=opt_config.outcome_constraints,
                status_quo=status_quo,
                properties=properties,
            )
            experiment.description = experiment_sqa.description
            experiment.is_test = experiment_sqa.is_test
        else:
            experiment = Experiment(
                name=experiment_sqa.name,
                description=experiment_sqa.description,
                search_space=search_space,
                optimization_config=opt_config,
                tracking_metrics=tracking_metrics,
                runner=runner,
                status_quo=status_quo,
                is_test=experiment_sqa.is_test,
                properties=properties,
            )
        return experiment
Exemplo n.º 29
0
    def test_best_point(
        self,
        _mock_gen,
        _mock_best_point,
        _mock_fit,
        _mock_predict,
        _mock_gen_arms,
        _mock_unwrap,
        _mock_obs_from_data,
    ):
        exp = Experiment(search_space=get_search_space_for_range_value(), name="test")
        oc = OptimizationConfig(
            objective=Objective(metric=Metric("a"), minimize=False),
            outcome_constraints=[],
        )
        modelbridge = ArrayModelBridge(
            search_space=get_search_space_for_range_value(),
            model=NumpyModel(),
            transforms=[t1, t2],
            experiment=exp,
            data=Data(),
            optimization_config=oc,
        )

        self.assertEqual(list(modelbridge.transforms.keys()), ["Cast", "t1", "t2"])

        # test check that optimization config is required
        with self.assertRaises(ValueError):
            run = modelbridge.gen(n=1, optimization_config=None)

        # _fit is mocked, which typically sets this.
        modelbridge.outcomes = ["a"]
        run = modelbridge.gen(
            n=1,
            optimization_config=oc,
        )

        arm, predictions = run.best_arm_predictions
        self.assertEqual(arm.parameters, {})
        self.assertEqual(predictions[0], {"m": 1.0})
        self.assertEqual(predictions[1], {"m": {"m": 2.0}})

        model_arm, model_predictions = modelbridge.model_best_point()
        self.assertEqual(model_predictions[0], {"m": 1.0})
        self.assertEqual(model_predictions[1], {"m": {"m": 2.0}})

        # test optimization config validation - raise error when
        # ScalarizedOutcomeConstraint contains a metric that is not in the outcomes
        with self.assertRaises(ValueError):
            run = modelbridge.gen(
                n=1,
                optimization_config=OptimizationConfig(
                    objective=Objective(metric=Metric("a"), minimize=False),
                    outcome_constraints=[
                        ScalarizedOutcomeConstraint(
                            metrics=[Metric("wrong_metric_name")],
                            weights=[1.0],
                            op=ComparisonOp.LEQ,
                            bound=0,
                        )
                    ],
                ),
            )