Exemplos de Experiment.fetch_data em Python, exemplos de ax.core.experiment.Experiment.fetch_data em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_experiment.py Projeto: linusec/Ax

 def testEmptyMetrics(self):
     empty_experiment = Experiment(name="test_experiment",
                                   search_space=get_search_space())
     self.assertEqual(empty_experiment.num_trials, 0)
     with self.assertRaises(ValueError):
         empty_experiment.fetch_data()
     batch = empty_experiment.new_batch_trial()
     self.assertEqual(empty_experiment.num_trials, 1)
     with self.assertRaises(ValueError):
         batch.fetch_data()
     empty_experiment.add_tracking_metric(Metric(name="some_metric"))
     empty_experiment.attach_data(get_data())
     self.assertFalse(empty_experiment.fetch_data().df.empty)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: generation_strategy.py Projeto: qingfeng10/Ax

 def gen(
     self,
     experiment: Experiment,
     data: Optional[Data] = None,
     n: int = 1,
     **kwargs: Any,
 ) -> GeneratorRun:
     """Produce the next points in the experiment."""
     self.experiment = experiment
     self._set_model(experiment=experiment, data=data or experiment.fetch_data())
     max_parallelism = self._curr.max_parallelism
     num_running = self.num_running_trials_for_current_step
     if max_parallelism is not None and num_running >= max_parallelism:
         raise MaxParallelismReachedException(
             step=self._curr, num_running=num_running
         )
     model = not_none(self.model)
     generator_run = model.gen(
         n=n,
         **consolidate_kwargs(
             kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
             keywords=get_function_argument_names(model.gen),
         ),
     )
     generator_run._generation_step_index = self._curr.index
     self._generator_runs.append(generator_run)
     return generator_run

Exemplo n.º 3

0

Exibir arquivo

 def testEmptyMetrics(self):
     empty_experiment = Experiment(name="test_experiment",
                                   search_space=get_search_space())
     self.assertEqual(empty_experiment.num_trials, 0)
     with self.assertRaises(ValueError):
         empty_experiment.fetch_data()
     batch = empty_experiment.new_batch_trial()
     batch.mark_running(no_runner_required=True)
     self.assertEqual(empty_experiment.num_trials, 1)
     with self.assertRaises(ValueError):
         batch.fetch_data()
     empty_experiment.add_tracking_metric(Metric(name="ax_test_metric"))
     self.assertTrue(empty_experiment.fetch_data().df.empty)
     empty_experiment.attach_data(get_data())
     batch.mark_completed()
     self.assertFalse(empty_experiment.fetch_data().df.empty)

Exemplo n.º 4

0

Exibir arquivo

def _extract_optimization_trace_from_metrics(
        experiment: Experiment) -> np.ndarray:
    names = []
    for trial in experiment.trials.values():
        for i, arm in enumerate(trial.arms):
            reps = int(trial.weights[i]) if isinstance(trial,
                                                       BatchTrial) else 1
            names.extend([arm.name] * reps)
    iters_df = pd.DataFrame({"arm_name": names})
    data_df = experiment.fetch_data(noisy=False).df
    metrics = data_df["metric_name"].unique()
    true_values = {}
    for metric in metrics:
        df_m = data_df[data_df["metric_name"] == metric]
        # Get one row per arm
        df_m = df_m.groupby("arm_name").first().reset_index()
        df_b = pd.merge(iters_df, df_m, how="left", on="arm_name")
        true_values[metric] = df_b["mean"].values
    if isinstance(experiment.optimization_config,
                  MultiObjectiveOptimizationConfig):
        return feasible_hypervolume(
            # pyre-fixme[6]: Expected `OptimizationConfig` for 1st param but got
            #  `Optional[ax.core.optimization_config.OptimizationConfig]`.
            optimization_config=experiment.optimization_config,
            values=true_values,
        )
    return best_feasible_objective(
        # pyre-fixme[6]: Expected `OptimizationConfig` for 1st param but got
        #  `Optional[ax.core.optimization_config.OptimizationConfig]`.
        optimization_config=experiment.optimization_config,
        values=true_values,
    )

Exemplo n.º 5

0

Exibir arquivo

    def gen(
        self,
        experiment: Experiment,
        data: Optional[Data] = None,
        n: int = 1,
        **kwargs: Any,
    ) -> GeneratorRun:
        """Generate new points, rotating through projections each time."""
        if data is None:
            data = experiment.fetch_data()
        if not isinstance(data, Data):
            raise ValueError(
                "Data fetched from experiment not an instance of PTS-supporting `Data`"
            )
        # Get the next model in the rotation
        i = self.current_iteration % self.k
        data_by_proj = self._filter_data_to_projection(
            experiment=experiment,
            data=data,
            arm_sigs=self.arms_by_proj[i],
        )
        lgr = self.last_generator_run
        # NOTE: May need to `model_class.deserialize_model_state` in the
        # future if using non-readily serializable state.
        model_state = (not_none(lgr._model_state_after_gen) if lgr is not None
                       and lgr._model_state_after_gen is not None else {})

        A, bounds_d = self.projections[i]
        if (data_by_proj is None or len(data_by_proj.df["arm_name"].unique()) <
                self.init_per_proj):
            # Not enough data to switch to GP, use Sobol for initialization
            m = get_rembo_initializer(
                search_space=experiment.search_space,
                A=A.double().numpy(),
                bounds_d=bounds_d,
                **model_state,
            )
        else:
            # We have enough data to switch to GP.
            m = get_REMBO(
                experiment=experiment,
                data=data_by_proj,
                A=A,
                initial_X_d=torch.tensor(self.X_d_by_proj[i],
                                         dtype=self.dtype,
                                         device=self.device),
                bounds_d=bounds_d,
                **self.gp_kwargs,
            )

        self.current_iteration += 1
        # Call gen
        gr = m.gen(n=n)
        self.X_d_by_proj[i].extend(not_none(
            m.model).X_d_gen)  # pyre-ignore[16]
        self.arms_by_proj[i].update(a.signature for a in gr.arms)
        self._generator_runs.append(gr)
        return gr

Exemplo n.º 6

0

Exibir arquivo

 def testEmptyMetrics(self):
     empty_experiment = Experiment(
         name="test_experiment",
         search_space=get_search_space(),
         default_data_type=DataType.MAP_DATA,
     )
     self.assertEqual(empty_experiment.num_trials, 0)
     empty_experiment.add_tracking_metric(Metric(name="ax_test_metric"))
     self.assertTrue(empty_experiment.fetch_data().df.empty)
     empty_experiment.attach_data(get_map_data())

Exemplo n.º 7

0

Exibir arquivo

Arquivo: best_point.py Projeto: kjanoudi/Ax

def get_best_raw_objective_point_with_trial_index(
    experiment: Experiment,
    optimization_config: Optional[OptimizationConfig] = None
) -> Tuple[int, TParameterization, Dict[str, Tuple[float, float]]]:
    """Given an experiment, identifies the arm that had the best raw objective,
    based on the data fetched from the experiment.

    Args:
        experiment: Experiment, on which to identify best raw objective arm.
        optimization_config: Optimization config to use in absence or in place of
            the one stored on the experiment.

    Returns:
        Tuple of parameterization and a mapping from metric name to a tuple of
            the corresponding objective mean and SEM.
    """
    # pyre-ignore [16]
    if isinstance(experiment.optimization_config.objective, MultiObjective):
        logger.warning(
            "get_best_raw_objective_point is deprecated for multi-objective "
            "optimization. This method will return an arbitrary point on the "
            "pareto frontier.")
    opt_config = optimization_config or experiment.optimization_config
    assert opt_config is not None, (
        "Cannot identify the best point without an optimization config, but no "
        "optimization config was provided on the experiment or as an argument."
    )
    dat = experiment.fetch_data()
    if dat.df.empty:
        raise ValueError(
            "Cannot identify best point if experiment contains no data.")
    objective = opt_config.objective
    if isinstance(objective, ScalarizedObjective):
        best_row = _get_best_row_for_scalarized_objective(df=dat.df,
                                                          objective=objective)
    else:
        best_row = _get_best_feasible_row_for_single_objective(
            df=dat.df,
            optimization_config=opt_config,
            status_quo=experiment.status_quo,
        )
    # pyre-fixme[6]: Expected `str` for 1st param but got `Series`.
    best_arm = experiment.arms_by_name[best_row["arm_name"]]
    best_trial_index = best_row["trial_index"]
    objective_rows = dat.df.loc[(dat.df["arm_name"] == best_arm.name)
                                & (dat.df["trial_index"] == best_trial_index)]
    vals = {
        row["metric_name"]: (row["mean"], row["sem"])
        for _, row in objective_rows.iterrows()
    }

    # pyre-fixme[7]: Expected `int` for 1st param but got `Series`.
    return best_trial_index, not_none(best_arm).parameters, vals

Exemplo n.º 8

0

Exibir arquivo

    def gen(
        self,
        experiment: Experiment,
        data: Optional[Data] = None,
        n: int = 1,
        **kwargs: Any,
    ) -> GeneratorRun:
        """Generate new points, rotating through projections each time."""
        # Use all data in experiment if none is supplied
        data = data or experiment.fetch_data()

        # Get the next model in the rotation
        i = self.current_iteration % self.k
        data_by_proj = self._filter_data_to_projection(
            experiment=experiment, data=data, arm_sigs=self.arms_by_proj[i])
        lgr = self.last_generator_run
        model_state = (not_none(lgr._model_state_after_gen) if lgr is not None
                       and lgr._model_state_after_gen is not None else {})

        A, bounds_d = self.projections[i]
        if (data_by_proj is None or len(data_by_proj.df["arm_name"].unique()) <
                self.init_per_proj):
            # Not enough data to switch to GP, use Sobol for initialization
            m = get_rembo_initializer(
                search_space=experiment.search_space,
                A=A.double().numpy(),
                bounds_d=bounds_d,
                **model_state,
            )
        else:
            # We have enough data to switch to GP.
            m = get_REMBO(
                experiment=experiment,
                data=data_by_proj,
                A=A,
                initial_X_d=torch.tensor(self.X_d_by_proj[i],
                                         dtype=self.dtype,
                                         device=self.device),
                bounds_d=bounds_d,
                **self.gp_kwargs,
            )

        self.current_iteration += 1
        # Call gen
        gr = m.gen(n=n)
        self.X_d_by_proj[i].extend(not_none(
            m.model).X_d_gen)  # pyre-ignore[16]
        self.arms_by_proj[i].update(a.signature for a in gr.arms)
        return gr

Exemplo n.º 9

0

Exibir arquivo

Arquivo: report_utils.py Projeto: proteanblank/Ax

def _get_objective_trace_plot(
    experiment: Experiment,
    metric_name: str,
    model_transitions: List[int],
    optimization_direction: Optional[str] = None,
) -> Optional[go.Figure]:
    best_objectives = np.array([experiment.fetch_data().df["mean"]])
    return optimization_trace_single_method_plotly(
        y=best_objectives,
        title="Best objective found vs. # of iterations",
        ylabel=metric_name,
        model_transitions=model_transitions,
        optimization_direction=optimization_direction,
        plot_trial_points=True,
    )

Exemplo n.º 10

0

Exibir arquivo

Arquivo: best_point.py Projeto: tingqin2017/Ax

def get_best_raw_objective_point(
    experiment: Experiment, optimization_config: Optional[OptimizationConfig] = None
) -> Tuple[TParameterization, Dict[str, Tuple[float, float]]]:
    """Given an experiment, identifies the arm that had the best raw objective,
    based on the data fetched from the experiment.

    Args:
        experiment: Experiment, on which to identify best raw objective arm.
        optimization_config: Optimization config to use in absence or in place of
            the one stored on the experiment.

    Returns:
        Tuple of parameterization and a mapping from metric name to a tuple of
            the corresponding objective mean and SEM.
    """
    dat = experiment.fetch_data()
    if dat.df.empty:
        raise ValueError("Cannot identify best point if experiment contains no data.")
    opt_config = optimization_config or experiment.optimization_config
    objective_name = opt_config.objective.metric.name
    objective_rows = dat.df.loc[dat.df["metric_name"] == objective_name]
    if objective_rows.empty:
        raise ValueError('No data has been logged for objective "{objective_name}".')
    optimization_config = optimization_config or opt_config
    assert optimization_config is not None, (
        "Cannot identify the best point without an optimization config, but no "
        "optimization config was provided on the experiment or as an argument."
    )
    best_row = (
        objective_rows.loc[objective_rows["mean"].idxmin()]
        if opt_config.objective.minimize
        else objective_rows.loc[objective_rows["mean"].idxmax()]
    )
    best_arm = experiment.arms_by_name.get(best_row["arm_name"])
    objective_rows = dat.df.loc[
        (dat.df["arm_name"] == best_row["arm_name"])
        & (dat.df["trial_index"] == best_row["trial_index"])
    ]
    vals = {
        row["metric_name"]: (row["mean"], row["sem"])
        for _, row in objective_rows.iterrows()
    }
    return not_none(best_arm).parameters, vals

Exemplo n.º 11

0

Exibir arquivo

Arquivo: best_point.py Projeto: kiminh/Ax

def get_best_raw_objective_point(
    experiment: Experiment, optimization_config: Optional[OptimizationConfig] = None
) -> Tuple[TParameterization, Dict[str, Tuple[float, float]]]:
    """Given an experiment, identifies the arm that had the best raw objective,
    based on the data fetched from the experiment.

    Args:
        experiment: Experiment, on which to identify best raw objective arm.
        optimization_config: Optimization config to use in absence or in place of
            the one stored on the experiment.

    Returns:
        Tuple of parameterization and a mapping from metric name to a tuple of
            the corresponding objective mean and SEM.
    """
    opt_config = optimization_config or experiment.optimization_config
    assert opt_config is not None, (
        "Cannot identify the best point without an optimization config, but no "
        "optimization config was provided on the experiment or as an argument."
    )
    dat = experiment.fetch_data()
    if dat.df.empty:
        raise ValueError("Cannot identify best point if experiment contains no data.")
    objective = opt_config.objective
    if isinstance(objective, ScalarizedObjective):
        best_row = _get_best_row_for_scalarized_objective(dat.df, objective)
    else:
        best_row = _get_best_row_for_single_objective(dat.df, objective)
    best_arm = experiment.arms_by_name[best_row["arm_name"]]
    best_trial_index = best_row["trial_index"]
    objective_rows = dat.df.loc[
        (dat.df["arm_name"] == best_arm.name)
        & (dat.df["trial_index"] == best_trial_index)
    ]
    vals = {
        row["metric_name"]: (row["mean"], row["sem"])
        for _, row in objective_rows.iterrows()
    }
    return not_none(best_arm).parameters, vals

Exemplo n.º 12

0

Exibir arquivo

    def _check_validity_and_get_data(
            self, experiment: Experiment) -> Optional[MapData]:
        """Validity checks and returns the `MapData` used for early stopping."""
        if experiment.optimization_config is None:
            raise UnsupportedError(  # pragma: no cover
                "Experiment must have an optimization config in order to use an "
                "early stopping strategy.")

        optimization_config = not_none(experiment.optimization_config)
        objective_name = optimization_config.objective.metric.name

        data = experiment.fetch_data()
        if data.df.empty:
            logger.info(f"{self.__class__.__name__} received empty data. "
                        "Not stopping any trials.")
            return None
        if objective_name not in set(data.df["metric_name"]):
            logger.info(f"{self.__class__.__name__} did not receive data "
                        "from the objective metric. Not stopping any trials.")
            return None

        if not isinstance(data, MapData):
            logger.info(
                f"{self.__class__.__name__} expects MapData, but the "
                f"data attached to experiment is of type {type(data)}. "
                "Not stopping any trials.")
            return None

        data = checked_cast(MapData, data)
        map_keys = data.map_keys
        if len(list(map_keys)) > 1:
            logger.info(
                f"{self.__class__.__name__} expects MapData with a single "
                "map key, but the data attached to the experiment has multiple: "
                f"{data.map_keys}. Not stopping any trials.")
            return None
        return data

Exemplo n.º 13

0

Exibir arquivo

    def gen(
        self,
        experiment: Experiment,
        data: Optional[Data] = None,
        n: int = 1,
        **kwargs: Any,
    ) -> GeneratorRun:
        """Produce the next points in the experiment."""
        self._set_experiment(experiment=experiment)
        new_arm_signatures = set()
        data = data or experiment.fetch_data()
        if data is not None and not data.df.empty:
            if self._data.df.empty:
                new_data = data.df
            else:
                # Select only the new data to determine how many new arms were
                # evaluated since the generation strategy was last updated with
                # data (find rows that are in `data.df`, but not in `self._data.df`)
                merged = data.df.merge(
                    self._data.df,
                    on=[
                        "arm_name", "trial_index", "metric_name", "mean", "sem"
                    ],
                    how="left",
                    indicator=True,
                )
                new_data = merged[merged["_merge"] == "left_only"]
            # Get arm signatures for each entry in data that the GS hasn't seen yet.
            new_arm_signatures = {
                not_none(experiment.arms_by_name.get(
                    row["arm_name"])).signature
                for _, row in new_data.iterrows()
                if (row["arm_name"] in experiment.arms_by_name
                    and not not_none(experiment.trials.get(
                        row["trial_index"])).status.is_failed)
            }

        enough_observed = (len(self._observed) + len(new_arm_signatures)
                           ) >= self._curr.min_arms_observed
        unlimited_arms = self._curr.num_arms == -1
        enough_generated = (not unlimited_arms
                            and len(self._generated) >= self._curr.num_arms)

        # Check that minimum observed_arms is satisfied if it's enforced.
        if self._curr.enforce_num_arms and enough_generated and not enough_observed:
            raise DataRequiredError(
                "All trials for current model have been generated, but not enough "
                "data has been observed to fit next model. Try again when more data "
                "are available.")
            # TODO[Lena, T44021164]: take into account failed trials. Potentially
            # reduce `_generated` count when a trial mentioned in new data failed.

        lgr = self.last_generator_run

        if enough_generated and enough_observed:
            # Change to the next model.
            self._change_model(experiment=experiment, data=data)
        elif lgr is not None and lgr._model_state_after_gen is not None:
            model_state = not_none(lgr._model_state_after_gen)
            self._set_current_model(experiment=experiment,
                                    data=data,
                                    **model_state)
        else:
            self._set_current_model(experiment=experiment, data=data)

        model = not_none(self._model)
        kwargs = consolidate_kwargs(
            kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
            keywords=get_function_argument_names(not_none(self._model).gen),
        )
        gen_run = model.gen(n=n, **kwargs)

        # If nothing failed, update known data, _generated, and _observed.
        self._data = data
        self._generated.extend([arm.signature for arm in gen_run.arms])
        self._observed.extend(new_arm_signatures)
        self._generator_runs.append(gen_run)
        return gen_run

Exemplo n.º 14

0

Exibir arquivo

def compute_pareto_frontier(
    experiment: Experiment,
    primary_objective: Metric,
    secondary_objective: Metric,
    data: Optional[Data] = None,
    outcome_constraints: Optional[List[OutcomeConstraint]] = None,
    absolute_metrics: Optional[List[str]] = None,
    num_points: int = 10,
    trial_index: Optional[int] = None,
    chebyshev: bool = True,
) -> ParetoFrontierResults:
    """Compute the Pareto frontier between two objectives. For experiments
    with batch trials, a trial index or data object must be provided.

    Args:
        experiment: The experiment to compute a pareto frontier for.
        primary_objective: The primary objective to optimize.
        secondary_objective: The secondary objective against which
            to trade off the primary objective.
        outcome_constraints: Outcome
            constraints to be respected by the optimization. Can only contain
            constraints on metrics that are not primary or secondary objectives.
        absolute_metrics: List of outcome metrics that
            should NOT be relativized w.r.t. the status quo (all other outcomes
            will be in % relative to status_quo).
        num_points: The number of points to compute on the
            Pareto frontier.
        chebyshev: Whether to use augmented_chebyshev_scalarization
            when computing Pareto Frontier points.

    Returns:
        ParetoFrontierResults: A NamedTuple with the following fields:
            - param_dicts: The parameter dicts of the
                points generated on the Pareto Frontier.
            - means: The posterior mean predictions of
                the model for each metric (same order as the param dicts).
            - sems: The posterior sem predictions of
                the model for each metric (same order as the param dicts).
            - primary_metric: The name of the primary metric.
            - secondary_metric: The name of the secondary metric.
            - absolute_metrics: List of outcome metrics that
                are NOT be relativized w.r.t. the status quo (all other metrics
                are in % relative to status_quo).
    """
    # TODO(jej): Implement using MultiObjectiveTorchModelBridge's _pareto_frontier
    model_gen_options = {
        "acquisition_function_kwargs": {
            "chebyshev_scalarization": chebyshev
        }
    }

    if (trial_index is None and data is None and any(
            isinstance(t, BatchTrial) for t in experiment.trials.values())):
        raise UnsupportedError(
            "Must specify trial index or data for experiment with batch trials"
        )
    absolute_metrics = [] if absolute_metrics is None else absolute_metrics
    for metric in absolute_metrics:
        if metric not in experiment.metrics:
            raise ValueError(f"Model was not fit on metric `{metric}`")

    if outcome_constraints is None:
        outcome_constraints = []
    else:
        # ensure we don't constrain an objective
        _validate_outcome_constraints(
            outcome_constraints=outcome_constraints,
            primary_objective=primary_objective,
            secondary_objective=secondary_objective,
        )

    # build posterior mean model
    if not data:
        try:
            data = (experiment.trials[trial_index].fetch_data()
                    if trial_index else experiment.fetch_data())
        except Exception as e:
            logger.info(f"Could not fetch data from experiment or trial: {e}")

    oc = _build_new_optimization_config(
        weights=np.array([0.5, 0.5]),
        primary_objective=primary_objective,
        secondary_objective=secondary_objective,
        outcome_constraints=outcome_constraints,
    )
    model = Models.MOO(
        experiment=experiment,
        data=data,
        acqf_constructor=get_PosteriorMean,
        optimization_config=oc,
    )

    status_quo = experiment.status_quo
    if status_quo:
        try:
            status_quo_prediction = model.predict([
                ObservationFeatures(
                    parameters=status_quo.parameters,
                    # pyre-fixme [6]: Expected `Optional[np.int64]` for trial_index
                    trial_index=trial_index,
                )
            ])
        except ValueError as e:
            logger.warning(f"Could not predict OOD status_quo outcomes: {e}")
            status_quo = None
            status_quo_prediction = None
    else:
        status_quo_prediction = None

    param_dicts: List[TParameterization] = []

    # Construct weightings with linear angular spacing.
    # TODO: Verify whether 0, 1 weights cause problems because of subset_model.
    alpha = np.linspace(0 + 0.01, np.pi / 2 - 0.01, num_points)
    primary_weight = (-1 if primary_objective.lower_is_better else
                      1) * np.cos(alpha)
    secondary_weight = (-1 if secondary_objective.lower_is_better else
                        1) * np.sin(alpha)
    weights_list = np.stack([primary_weight, secondary_weight]).transpose()
    for weights in weights_list:
        outcome_constraints = outcome_constraints
        oc = _build_new_optimization_config(
            weights=weights,
            primary_objective=primary_objective,
            secondary_objective=secondary_objective,
            outcome_constraints=outcome_constraints,
        )
        # TODO: (jej) T64002590 Let this serve as a starting point for optimization.
        # ex. Add global spacing criterion. Implement on BoTorch side.
        # pyre-fixme [6]: Expected different type for model_gen_options
        run = model.gen(1,
                        model_gen_options=model_gen_options,
                        optimization_config=oc)
        param_dicts.append(run.arms[0].parameters)

    # Call predict on points to get their decomposed metrics.
    means, cov = model.predict(
        [ObservationFeatures(parameters) for parameters in param_dicts])

    return _extract_pareto_frontier_results(
        param_dicts=param_dicts,
        means=means,
        variances=cov,
        primary_metric=primary_objective.name,
        secondary_metric=secondary_objective.name,
        absolute_metrics=absolute_metrics,
        outcome_constraints=outcome_constraints,
        status_quo_prediction=status_quo_prediction,
    )

Exemplo n.º 15

0

Exibir arquivo

def _extract_asynchronous_optimization_trace(
    experiment: Experiment,
    start_time: float,
    end_time: float,
    delta_t: float,
    completed_time_key: str,
    include_only_completed_trials: bool,
) -> np.ndarray:
    """Extract optimization trace for an asynchronous benchmark run. This involves
    getting the `completed_time` from the trial `run_metadata`, as described by
    the `completed_time_key`. From the `start_time`, `end_time`, and `delta_t`
    arguments, a sequence of times is constructed. The returned optimization trace
    is the best achieved value so far for each time, amongst completed (or early
    stopped) trials.

    Args:
        experiment: The experiment from which to generate results.
        start_time: The starting time.
        end_time: The ending time.
        delta_t: The increment between successive time points.
        completed_time_key: The key from which we look up completed run times
            from trial `run_metadata`.
        include_only_completed_trials: Include results only from completed trials.
            This will ignore trials that were early stopped.

    Returns:
        An array representing the optimization trace as a function of time.
    """
    if any(
            isinstance(trial, BatchTrial)
            for trial in experiment.trials.values()):
        raise NotImplementedError("Batched trials are not yet supported.")

    def get_completed_time(row):
        time = experiment.trials[
            row.trial_index].run_metadata[completed_time_key]
        return pd.Series({"completed_time": time})

    if include_only_completed_trials:
        completed_trials = experiment.trial_indices_by_status[
            TrialStatus.COMPLETED]
        data_df = experiment.fetch_trials_data(trial_indices=completed_trials,
                                               noisy=False).df
    else:
        data_df = experiment.fetch_data(noisy=False).df

    minimize = experiment.optimization_config.objective.minimize  # pyre-ignore[16]
    num_periods_running = int((end_time - start_time) // delta_t + 1)
    # TODO: Currently, the timestamps generated below must exactly match the
    # `completed_time` column
    iters_df = pd.DataFrame({
        "completed_time":
        np.arange(num_periods_running) * delta_t + start_time
    })
    true_values = {}
    for metric, df_m in data_df.groupby("metric_name"):
        df_m = data_df[data_df["metric_name"] == metric]

        # only keep the last data point for each arm
        df_m = (df_m.sort_values(["timestamp"],
                                 ascending=True).groupby("arm_name").tail(n=1))

        # get completed times from run metadata
        df_m["completed_time"] = df_m.apply(get_completed_time, axis=1)

        # for trials that completed at the same time, keep only the best
        df_m_g = df_m.groupby("completed_time")
        df_m = (df_m_g.min() if minimize else df_m_g.max()).reset_index()

        # take cumulative best wrt the completed time
        df_m = df_m.sort_index()
        df_m["mean"] = df_m["mean"].cummin(
        ) if minimize else df_m["mean"].cummax()
        df_b = pd.merge(iters_df, df_m, how="left", on="completed_time")

        # replace nans with Infs, which can be handled by `best_feasible_objective`
        true_values[metric] = df_b["mean"].fillna(
            np.Inf if minimize else -np.Inf)
    return best_feasible_objective(
        # pyre-fixme[6]: Expected `OptimizationConfig` for 1st param but got
        #  `Optional[ax.core.optimization_config.OptimizationConfig]`.
        optimization_config=experiment.optimization_config,
        values=true_values,
    )

Exemplo n.º 16

0

Exibir arquivo

Arquivo: report_utils.py Projeto: proteanblank/Ax

def exp_to_df(
    exp: Experiment,
    metrics: Optional[List[Metric]] = None,
    run_metadata_fields: Optional[List[str]] = None,
    trial_properties_fields: Optional[List[str]] = None,
    **kwargs: Any,
) -> pd.DataFrame:
    """Transforms an experiment to a DataFrame. Only supports Experiment and
    SimpleExperiment.

    Transforms an Experiment into a dataframe with rows keyed by trial_index
    and arm_name, metrics pivoted into one row.

    Args:
        exp: An Experiment that may have pending trials.
        metrics: Override list of metrics to return. Return all metrics if None.
        run_metadata_fields: fields to extract from trial.run_metadata for trial
            in experiment.trials. If there are multiple arms per trial, these
            fields will be replicated across the arms of a trial.
        trial_properties_fields: fields to extract from trial._properties for trial
            in experiment.trials. If there are multiple arms per trial, these fields
            will be replicated across the arms of a trial. Output columns names will be
            prepended with "trial_properties_".

        **kwargs: Custom named arguments, useful for passing complex
            objects from call-site to the `fetch_data` callback.

    Returns:
        DataFrame: A dataframe of inputs, metadata and metrics by trial and arm. If
        no trials are available, returns an empty dataframe. If no metric ouputs are
        available, returns a dataframe of inputs and metadata.
    """
    def prep_return(df: pd.DataFrame, drop_col: str,
                    sort_by: List[str]) -> pd.DataFrame:
        return not_none(
            not_none(df.drop(drop_col, axis=1)).sort_values(sort_by))

    def merge_trials_dict_with_df(df: pd.DataFrame, trials_dict: Dict[int,
                                                                      Any],
                                  column_name: str) -> None:
        """Add a column ``column_name`` to a DataFrame ``df`` containing a column
        ``trial_index``. Each value of the new column is given by the element of
        ``trials_dict`` indexed by ``trial_index``.

        Args:
            df: Pandas DataFrame with column ``trial_index``, to be appended with a new
                column.
            trials_dict: Dict mapping each ``trial_index`` to a value. The new column of
                df will be populated with the value corresponding with the
                ``trial_index`` of each row.
            column_name: Name of the column to be appended to ``df``.
        """

        if "trial_index" not in df.columns:
            raise ValueError("df must have trial_index column")
        if any(trials_dict.values()):  # field present for any trial
            if not all(trials_dict.values()):  # not present for all trials
                logger.warning(
                    f"Column {column_name} missing for some trials. "
                    "Filling with None when missing.")
            df[column_name] = [
                trials_dict[trial_index] for trial_index in df.trial_index
            ]
        else:
            logger.warning(f"Column {column_name} missing for all trials. "
                           "Not appending column.")

    def get_generation_method_str(trial: BaseTrial) -> str:
        generation_methods = {
            not_none(generator_run._model_key)
            for generator_run in trial.generator_runs
            if generator_run._model_key is not None
        }
        # add "Manual" if any generator_runs are manual
        if any(generator_run.generator_run_type == GeneratorRunType.MANUAL.name
               for generator_run in trial.generator_runs):
            generation_methods.add("Manual")
        return ", ".join(
            generation_methods) if generation_methods else "Unknown"

    # Accept Experiment and SimpleExperiment
    if isinstance(exp, MultiTypeExperiment):
        raise ValueError(
            "Cannot transform MultiTypeExperiments to DataFrames.")

    key_components = ["trial_index", "arm_name"]

    # Get each trial-arm with parameters
    arms_df = pd.DataFrame()
    for trial_index, trial in exp.trials.items():
        for arm in trial.arms:
            arms_df = arms_df.append(
                {
                    "arm_name": arm.name,
                    "trial_index": trial_index,
                    **arm.parameters
                },
                ignore_index=True,
            )

    # Fetch results; in case arms_df is empty, return empty results (legacy behavior)
    results = exp.fetch_data(metrics, **kwargs).df
    if len(arms_df.index) == 0:
        if len(results.index) != 0:
            raise ValueError(
                "exp.fetch_data().df returned more rows than there are experimental "
                "arms. This is an inconsistent experimental state. Please report to "
                "Ax support.")
        return results

    # Create key column from key_components
    arms_df["trial_index"] = arms_df["trial_index"].astype(int)
    key_col = "-".join(key_components)
    key_vals = arms_df[key_components[0]].astype("str") + arms_df[
        key_components[1]].astype("str")
    arms_df[key_col] = key_vals

    # Add trial status
    trials = exp.trials.items()
    trial_to_status = {index: trial.status.name for index, trial in trials}
    merge_trials_dict_with_df(df=arms_df,
                              trials_dict=trial_to_status,
                              column_name="trial_status")

    # Add generation_method, accounting for the generic case that generator_runs is of
    # arbitrary length. Repeated methods within a trial are condensed via `set` and an
    # empty set will yield "Unknown" as the method.
    trial_to_generation_method = {
        trial_index: get_generation_method_str(trial)
        for trial_index, trial in trials
    }

    merge_trials_dict_with_df(
        df=arms_df,
        trials_dict=trial_to_generation_method,
        column_name="generation_method",
    )

    # Add any trial properties fields to arms_df
    if trial_properties_fields is not None:
        # add trial._properties fields
        for field in trial_properties_fields:
            trial_to_properties_field = {
                trial_index: (trial._properties[field]
                              if field in trial._properties else None)
                for trial_index, trial in trials
            }
            merge_trials_dict_with_df(
                df=arms_df,
                trials_dict=trial_to_properties_field,
                column_name="trial_properties_" + field,
            )

    # Add any run_metadata fields to arms_df
    if run_metadata_fields is not None:
        # add run_metadata fields
        for field in run_metadata_fields:
            trial_to_metadata_field = {
                trial_index: (trial.run_metadata[field]
                              if field in trial.run_metadata else None)
                for trial_index, trial in trials
            }
            merge_trials_dict_with_df(
                df=arms_df,
                trials_dict=trial_to_metadata_field,
                column_name=field,
            )

    if len(results.index) == 0:
        logger.info(
            f"No results present for the specified metrics `{metrics}`. "
            "Returning arm parameters and metadata only.")
        exp_df = arms_df
    elif not all(col in results.columns for col in key_components):
        logger.warn(
            f"At least one of key columns `{key_components}` not present in results df "
            f"`{results}`. Returning arm parameters and metadata only.")
        exp_df = arms_df
    else:
        # prepare results for merge
        key_vals = results[key_components[0]].astype("str") + results[
            key_components[1]].astype("str")
        results[key_col] = key_vals
        metric_vals = results.pivot(index=key_col,
                                    columns="metric_name",
                                    values="mean").reset_index()

        # dedupe results by key_components
        metadata = results[key_components + [key_col]].drop_duplicates()
        metrics_df = pd.merge(metric_vals, metadata, on=key_col)

        # merge and return
        exp_df = pd.merge(metrics_df,
                          arms_df,
                          on=key_components + [key_col],
                          how="outer")
    return prep_return(df=exp_df, drop_col=key_col, sort_by=["arm_name"])

Exemplo n.º 17

0

Exibir arquivo

def exp_to_df(
    exp: Experiment,
    metrics: Optional[List[Metric]] = None,
    run_metadata_fields: Optional[List[str]] = None,
    trial_properties_fields: Optional[List[str]] = None,
    **kwargs: Any,
) -> pd.DataFrame:
    """Transforms an experiment to a DataFrame with rows keyed by trial_index
    and arm_name, metrics pivoted into one row. If the pivot results in more than
    one row per arm (or one row per ``arm * map_keys`` combination if ``map_keys`` are
    present), results are omitted and warning is produced. Only supports
    ``Experiment``.

    Transforms an ``Experiment`` into a ``pd.DataFrame``.

    Args:
        exp: An ``Experiment`` that may have pending trials.
        metrics: Override list of metrics to return. Return all metrics if ``None``.
        run_metadata_fields: fields to extract from ``trial.run_metadata`` for trial
            in ``experiment.trials``. If there are multiple arms per trial, these
            fields will be replicated across the arms of a trial.
        trial_properties_fields: fields to extract from ``trial._properties`` for trial
            in ``experiment.trials``. If there are multiple arms per trial, these
            fields will be replicated across the arms of a trial. Output columns names
            will be prepended with ``"trial_properties_"``.
        **kwargs: Custom named arguments, useful for passing complex
            objects from call-site to the `fetch_data` callback.

    Returns:
        DataFrame: A dataframe of inputs, metadata and metrics by trial and arm (and
        ``map_keys``, if present). If no trials are available, returns an empty
        dataframe. If no metric ouputs are available, returns a dataframe of inputs and
        metadata.
    """

    # Accept Experiment and SimpleExperiment
    if isinstance(exp, MultiTypeExperiment):
        raise ValueError(
            "Cannot transform MultiTypeExperiments to DataFrames.")

    key_components = ["trial_index", "arm_name"]

    # Get each trial-arm with parameters
    arms_df = pd.DataFrame()
    for trial_index, trial in exp.trials.items():
        for arm in trial.arms:
            arms_df = arms_df.append(
                {
                    "arm_name": arm.name,
                    "trial_index": trial_index,
                    **arm.parameters
                },
                ignore_index=True,
            )

    # Fetch results; in case arms_df is empty, return empty results (legacy behavior)
    data = exp.fetch_data(metrics, **kwargs)
    results = data.df
    if len(arms_df.index) == 0:
        if len(results.index) != 0:
            raise ValueError(
                "exp.fetch_data().df returned more rows than there are experimental "
                "arms. This is an inconsistent experimental state. Please report to "
                "Ax support.")
        return results

    # Create key column from key_components
    arms_df["trial_index"] = arms_df["trial_index"].astype(int)

    # Add trial status
    trials = exp.trials.items()
    trial_to_status = {index: trial.status.name for index, trial in trials}
    _merge_trials_dict_with_df(df=arms_df,
                               trials_dict=trial_to_status,
                               column_name="trial_status")

    # Add generation_method, accounting for the generic case that generator_runs is of
    # arbitrary length. Repeated methods within a trial are condensed via `set` and an
    # empty set will yield "Unknown" as the method.
    trial_to_generation_method = {
        trial_index: _get_generation_method_str(trial)
        for trial_index, trial in trials
    }

    _merge_trials_dict_with_df(
        df=arms_df,
        trials_dict=trial_to_generation_method,
        column_name="generation_method",
    )

    # Add any trial properties fields to arms_df
    if trial_properties_fields is not None:
        # add trial._properties fields
        for field in trial_properties_fields:
            trial_to_properties_field = {
                trial_index: (trial._properties[field]
                              if field in trial._properties else None)
                for trial_index, trial in trials
            }
            _merge_trials_dict_with_df(
                df=arms_df,
                trials_dict=trial_to_properties_field,
                column_name="trial_properties_" + field,
            )

    # Add any run_metadata fields to arms_df
    if run_metadata_fields is not None:
        # add run_metadata fields
        for field in run_metadata_fields:
            trial_to_metadata_field = {
                trial_index: (trial.run_metadata[field]
                              if field in trial.run_metadata else None)
                for trial_index, trial in trials
            }
            _merge_trials_dict_with_df(
                df=arms_df,
                trials_dict=trial_to_metadata_field,
                column_name=field,
            )
    exp_df = _merge_results_if_no_duplicates(
        arms_df=arms_df,
        data=data,
        key_components=key_components,
        metrics=metrics or list(exp.metrics.values()),
    )

    return not_none(not_none(exp_df).sort_values(["trial_index"]))

Exemplo n.º 18

0

Exibir arquivo

def get_standard_plots(
    experiment: Experiment,
    model: Optional[ModelBridge],
    data: Optional[Data] = None,
    model_transitions: Optional[List[int]] = None,
    true_objective_metric_name: Optional[str] = None,
) -> List[go.Figure]:
    """Extract standard plots for single-objective optimization.

    Extracts a list of plots from an ``Experiment`` and ``ModelBridge`` of general
    interest to an Ax user. Currently not supported are
    - TODO: multi-objective optimization
    - TODO: ChoiceParameter plots

    Args:
        - experiment: The ``Experiment`` from which to obtain standard plots.
        - model: The ``ModelBridge`` used to suggest trial parameters.
        - data: If specified, data, to which to fit the model before generating plots.
        - model_transitions: The arm numbers at which shifts in generation_strategy
            occur.

    Returns:
        - a plot of objective value vs. trial index, to show experiment progression
        - a plot of objective value vs. range parameter values, only included if the
          model associated with generation_strategy can create predictions. This
          consists of:

            - a plot_slice plot if the search space contains one range parameter
            - an interact_contour plot if the search space contains multiple
              range parameters

    """
    if (true_objective_metric_name is not None
            and true_objective_metric_name not in experiment.metrics.keys()):
        raise ValueError(
            f"true_objective_metric_name='{true_objective_metric_name}' is not present "
            f"in experiment.metrics={experiment.metrics}. Please add a valid "
            "true_objective_metric_name or remove the optional parameter to get "
            "standard plots.")

    objective = not_none(experiment.optimization_config).objective
    if isinstance(objective, ScalarizedObjective):
        logger.warning(
            "get_standard_plots does not currently support ScalarizedObjective "
            "optimization experiments. Returning an empty list.")
        return []

    if data is None:
        data = experiment.fetch_data()

    if data.df.empty:
        logger.info(
            f"Experiment {experiment} does not yet have data, nothing to plot."
        )
        return []

    output_plot_list = []
    output_plot_list.extend(
        _get_objective_trace_plot(
            experiment=experiment,
            data=data,
            model_transitions=model_transitions
            if model_transitions is not None else [],
            true_objective_metric_name=true_objective_metric_name,
        ))

    # Objective vs. parameter plot requires a `Model`, so add it only if model
    # is alrady available. In cases where initially custom trials are attached,
    # model might not yet be set on the generation strategy.
    if model:
        # TODO: Check if model can predict in favor of try/catch.
        try:
            if true_objective_metric_name is not None:
                output_plot_list.append(
                    _objective_vs_true_objective_scatter(
                        model=model,
                        objective_metric_name=objective.metric_names[0],
                        true_objective_metric_name=true_objective_metric_name,
                    ))
            output_plot_list.extend(
                _get_objective_v_param_plots(
                    experiment=experiment,
                    model=model,
                ))
            output_plot_list.extend(_get_cross_validation_plots(model=model))
            feature_importance_plot = plot_feature_importance_by_feature_plotly(
                model=model,
                relative=False,
                caption=FEATURE_IMPORTANCE_CAPTION)
            feature_importance_plot.layout.title = "[ADVANCED] " + str(
                # pyre-fixme[16]: go.Figure has no attribute `layout`
                feature_importance_plot.layout.title.text)
            output_plot_list.append(feature_importance_plot)
            output_plot_list.append(
                interact_fitted_plotly(model=model, rel=False))
        except NotImplementedError:
            # Model does not implement `predict` method.
            pass

    return [plot for plot in output_plot_list if plot is not None]

Exemplo n.º 19

0

Exibir arquivo

Arquivo: report_utils.py Projeto: cristicmf/Ax

def exp_to_df(
    exp: Experiment,
    metrics: Optional[List[Metric]] = None,
    key_components: Optional[List[str]] = None,
    run_metadata_fields: Optional[List[str]] = None,
    **kwargs: Any,
) -> pd.DataFrame:
    """Transforms an experiment to a DataFrame. Only supports Experiment and
    SimpleExperiment.

    Transforms an Experiment into a dataframe with rows keyed by trial_index
    and arm_name, metrics pivoted into one row.

    Args:
        exp: An Experiment that may have pending trials.
        metrics: Override list of metrics to return. Return all metrics if None.
        key_components: fields that combine to make a unique key corresponding
            to rows, similar to the list of fields passed to a GROUP BY.
            Defaults to ['arm_name', 'trial_index'].
        run_metadata_fields: fields to extract from trial.run_metadata for trial
            in experiment.trials. If there are multiple arms per trial, these
            fields will be replicated across the arms of a trial.
        **kwargs: Custom named arguments, useful for passing complex
            objects from call-site to the `fetch_data` callback.

    Returns:
        DataFrame: A dataframe of inputs and metrics by trial and arm.
    """

    def prep_return(
        df: pd.DataFrame, drop_col: str, sort_by: List[str]
    ) -> pd.DataFrame:
        return not_none(not_none(df.drop(drop_col, axis=1)).sort_values(sort_by))

    key_components = key_components or ["trial_index", "arm_name"]

    # Accept Experiment and SimpleExperiment
    if isinstance(exp, MultiTypeExperiment):
        raise ValueError("Cannot transform MultiTypeExperiments to DataFrames.")

    results = exp.fetch_data(metrics, **kwargs).df
    if len(results.index) == 0:  # Handle empty case
        return results

    # create key column from key_components
    key_col = "-".join(key_components)
    key_vals = results[key_components[0]].astype("str")
    for key in key_components[1:]:
        key_vals = key_vals + results[key].astype("str")
    results[key_col] = key_vals

    # pivot dataframe from long to wide
    metric_vals = results.pivot(
        index=key_col, columns="metric_name", values="mean"
    ).reset_index()

    # dedupe results by key_components
    metadata = results[key_components + [key_col]].drop_duplicates()
    metric_and_metadata = pd.merge(metric_vals, metadata, on=key_col)

    # get params of each arm and merge with deduped results
    arm_names_and_params = pd.DataFrame(
        [{"arm_name": name, **arm.parameters} for name, arm in exp.arms_by_name.items()]
    )
    exp_df = pd.merge(metric_and_metadata, arm_names_and_params, on="arm_name")

    # add trial status
    trials = exp.trials.items()
    trial_to_status = {index: trial.status.name for index, trial in trials}
    exp_df["trial_status"] = [trial_to_status[key] for key in exp_df.trial_index]

    # if no run_metadata fields are requested, return exp_df so far
    if run_metadata_fields is None:
        return prep_return(df=exp_df, drop_col=key_col, sort_by=key_components)
    if not isinstance(run_metadata_fields, list):
        raise ValueError("run_metadata_fields must be List[str] or None.")

    # add additional run_metadata fields
    for field in run_metadata_fields:
        trial_to_metadata_field = {
            index: (trial.run_metadata[field] if field in trial.run_metadata else None)
            for index, trial in trials
        }
        if any(trial_to_metadata_field.values()):  # field present for any trial
            if not all(trial_to_metadata_field.values()):  # not present for all trials
                logger.warning(
                    f"Field {field} missing for some trials' run_metadata. "
                    "Returning None when missing."
                )
            exp_df[field] = [trial_to_metadata_field[key] for key in exp_df.trial_index]
        else:
            logger.warning(
                f"Field {field} missing for all trials' run_metadata. "
                "Not appending column."
            )
    return prep_return(df=exp_df, drop_col=key_col, sort_by=key_components)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: report_utils.py Projeto: cristicmf/Ax

def get_standard_plots(
    experiment: Experiment, generation_strategy: GenerationStrategy
) -> List[go.Figure]:
    """Extract standard plots for single-objective optimization.

    Extracts a list of plots from an Experiment and GenerationStrategy of general
    interest to an Ax user. Currently not supported are
    - TODO: multi-objective optimization
    - TODO: ChoiceParameter plots

    Args:
        - experiment: the Experiment from which to obtain standard plots.
        - generation_strategy: the GenerationStrategy used to suggest trial parameters
          in experiment

    Returns:
        - a plot of objective value vs. trial index, to show experiment progression
        - a plot of objective value vs. range parameter values, only included if the
          model associated with generation_strategy can create predictions. This
          consists of:

            - a plot_slice plot if the search space contains one range parameter
            - an interact_contour plot if the search space contains multiple
              range parameters

    """

    objective = not_none(experiment.optimization_config).objective
    if isinstance(objective, MultiObjective):
        logger.warning(
            "get_standard_plots does not currently support MultiObjective "
            "optimization experiments. Returning an empty list."
        )
        return []
    if isinstance(objective, ScalarizedObjective):
        logger.warning(
            "get_standard_plots does not currently support ScalarizedObjective "
            "optimization experiments. Returning an empty list."
        )
        return []

    if experiment.fetch_data().df.empty:
        logger.info(f"Experiment {experiment} does not yet have data, nothing to plot.")
        return []

    output_plot_list = []
    output_plot_list.append(
        _get_objective_trace_plot(
            experiment=experiment,
            metric_name=not_none(experiment.optimization_config).objective.metric.name,
            model_transitions=generation_strategy.model_transitions,
            optimization_direction=(
                "minimize"
                if not_none(experiment.optimization_config).objective.minimize
                else "maximize"
            ),
        )
    )

    try:
        output_plot_list.append(
            _get_objective_v_param_plot(
                search_space=experiment.search_space,
                model=not_none(generation_strategy.model),
                metric_name=not_none(
                    experiment.optimization_config
                ).objective.metric.name,
                trials=experiment.trials,
            )
        )
    except NotImplementedError:
        # Model does not implement `predict` method.
        pass

    return [plot for plot in output_plot_list if plot is not None]

Exemplo n.º 21

0

Exibir arquivo

Arquivo: pareto_utils.py Projeto: axelstjerngren/Ax

def get_observed_pareto_frontiers(
    experiment: Experiment,
    data: Optional[Data] = None,
    rel: bool = True,
) -> List[ParetoFrontierResults]:
    """
    Find all Pareto points from an experiment.

    Uses only values as observed in the data; no modeling is involved. Makes no
    assumption about the search space or types of parameters. If "data" is provided will
    use that, otherwise will use all data attached to the experiment.

    Uses all arms present in data; does not filter according to experiment
    search space.

    Assumes experiment has a multiobjective optimization config from which the
    objectives and outcome constraints will be extracted.

    Will generate a ParetoFrontierResults for every pair of metrics in the experiment's
    multiobjective optimization config.
    """
    if data is None:
        data = experiment.fetch_data()
    if experiment.optimization_config is None:
        raise ValueError("Experiment must have an optimization config")
    mb = get_tensor_converter_model(experiment=experiment, data=data)
    pareto_observations = observed_pareto_frontier(modelbridge=mb)
    # Convert to ParetoFrontierResults
    metric_names = [
        metric.name for metric in
        experiment.optimization_config.objective.metrics  # pyre-ignore
    ]
    pfr_means = {name: [] for name in metric_names}
    pfr_sems = {name: [] for name in metric_names}

    for obs in pareto_observations:
        for i, name in enumerate(obs.data.metric_names):
            pfr_means[name].append(obs.data.means[i])
            pfr_sems[name].append(np.sqrt(obs.data.covariance[i, i]))

    # Relativize as needed
    if rel and experiment.status_quo is not None:
        # Get status quo values
        sq_df = data.df[data.df["arm_name"] ==
                        experiment.status_quo.name  # pyre-ignore
                        ]
        sq_df = sq_df.to_dict(orient="list")  # pyre-ignore
        sq_means = {}
        sq_sems = {}
        for i, metric in enumerate(sq_df["metric_name"]):
            sq_means[metric] = sq_df["mean"][i]
            sq_sems[metric] = sq_df["sem"][i]
        # Relativize
        for name in metric_names:
            if np.isnan(sq_sems[name]) or np.isnan(pfr_sems[name]).any():
                # Just relativize means
                pfr_means[name] = [(mu / sq_means[name] - 1) * 100
                                   for mu in pfr_means[name]]
            else:
                # Use delta method
                pfr_means[name], pfr_sems[name] = relativize(
                    means_t=pfr_means[name],
                    sems_t=pfr_sems[name],
                    mean_c=sq_means[name],
                    sem_c=sq_sems[name],
                    as_percent=True,
                )
        absolute_metrics = []
    else:
        absolute_metrics = metric_names

    objective_thresholds = {}
    if experiment.optimization_config.objective_thresholds is not None:  # pyre-ignore
        for objth in experiment.optimization_config.objective_thresholds:
            is_rel = objth.metric.name not in absolute_metrics
            if objth.relative != is_rel:
                raise ValueError(
                    f"Objective threshold for {objth.metric.name} has "
                    f"rel={objth.relative} but was specified here as rel={is_rel}"
                )
            objective_thresholds[objth.metric.name] = objth.bound

    # Construct ParetoFrontResults for each pair
    pfr_list = []
    param_dicts = [obs.features.parameters for obs in pareto_observations]
    arm_names = [obs.arm_name for obs in pareto_observations]

    for metric_a, metric_b in combinations(metric_names, 2):
        pfr_list.append(
            ParetoFrontierResults(
                param_dicts=param_dicts,
                means=pfr_means,
                sems=pfr_sems,
                primary_metric=metric_a,
                secondary_metric=metric_b,
                absolute_metrics=absolute_metrics,
                objective_thresholds=objective_thresholds,
                arm_names=arm_names,
            ))
    return pfr_list

Exemplo n.º 22

0

Exibir arquivo

Arquivo: report_utils.py Projeto: proteanblank/Ax

def get_standard_plots(
    experiment: Experiment,
    model: Optional[ModelBridge],
    model_transitions: Optional[List[int]] = None,
) -> List[go.Figure]:
    """Extract standard plots for single-objective optimization.

    Extracts a list of plots from an ``Experiment`` and ``ModelBridge`` of general
    interest to an Ax user. Currently not supported are
    - TODO: multi-objective optimization
    - TODO: ChoiceParameter plots

    Args:
        - experiment: The ``Experiment`` from which to obtain standard plots.
        - model: The ``ModelBridge`` used to suggest trial parameters.
        - data: If specified, data, to which to fit the model before generating plots.
        - model_transitions: The arm numbers at which shifts in generation_strategy
            occur.

    Returns:
        - a plot of objective value vs. trial index, to show experiment progression
        - a plot of objective value vs. range parameter values, only included if the
          model associated with generation_strategy can create predictions. This
          consists of:

            - a plot_slice plot if the search space contains one range parameter
            - an interact_contour plot if the search space contains multiple
              range parameters

    """

    objective = not_none(experiment.optimization_config).objective
    if isinstance(objective, MultiObjective):
        logger.warning(
            "get_standard_plots does not currently support MultiObjective "
            "optimization experiments. Returning an empty list.")
        return []
    if isinstance(objective, ScalarizedObjective):
        logger.warning(
            "get_standard_plots does not currently support ScalarizedObjective "
            "optimization experiments. Returning an empty list.")
        return []

    if experiment.fetch_data().df.empty:
        logger.info(
            f"Experiment {experiment} does not yet have data, nothing to plot."
        )
        return []

    output_plot_list = []
    output_plot_list.append(
        _get_objective_trace_plot(
            experiment=experiment,
            metric_name=not_none(
                experiment.optimization_config).objective.metric.name,
            model_transitions=model_transitions
            if model_transitions is not None else [],
            optimization_direction=("minimize" if not_none(
                experiment.optimization_config).objective.minimize else
                                    "maximize"),
        ))

    # Objective vs. parameter plot requires a `Model`, so add it only if model
    # is alrady available. In cases where initially custom trials are attached,
    # model might not yet be set on the generation strategy.
    if model:
        # TODO: Check if model can predict in favor of try/catch.
        try:
            output_plot_list.append(
                _get_objective_v_param_plot(
                    search_space=experiment.search_space,
                    model=model,
                    metric_name=not_none(
                        experiment.optimization_config).objective.metric.name,
                    trials=experiment.trials,
                ))
            output_plot_list.append(_get_cross_validation_plot(model))
        except NotImplementedError:
            # Model does not implement `predict` method.
            pass

    return [plot for plot in output_plot_list if plot is not None]