Пример #1
0
    def __call__(
        self,
        search_space: Optional[SearchSpace] = None,
        experiment: Optional[Experiment] = None,
        data: Optional[Data] = None,
        silently_filter_kwargs: bool = True,  # TODO[Lena]: default to False
        **kwargs: Any,
    ) -> ModelBridge:
        assert self.value in MODEL_KEY_TO_MODEL_SETUP, f"Unknown model {self.value}"
        # All model bridges require either a search space or an experiment.
        assert search_space or experiment, "Search space or experiment required."
        model_setup_info = MODEL_KEY_TO_MODEL_SETUP[self.value]
        model_class = model_setup_info.model_class
        bridge_class = model_setup_info.bridge_class
        if not silently_filter_kwargs:
            validate_kwarg_typing(  # TODO[Lena]: T46467254, pragma: no cover
                typed_callables=[model_class, bridge_class],
                search_space=search_space,
                experiment=experiment,
                data=data,
                **kwargs,
            )

        # Create model with consolidated arguments: defaults + passed in kwargs.
        model_kwargs = consolidate_kwargs(
            kwargs_iterable=[get_function_default_arguments(model_class), kwargs],
            keywords=get_function_argument_names(model_class),
        )
        model = model_class(**model_kwargs)

        # Create `ModelBridge`: defaults + standard kwargs + passed in kwargs.
        bridge_kwargs = consolidate_kwargs(
            kwargs_iterable=[
                get_function_default_arguments(bridge_class),
                model_setup_info.standard_bridge_kwargs,
                {"transforms": model_setup_info.transforms},
                kwargs,
            ],
            keywords=get_function_argument_names(
                function=bridge_class, omit=["experiment", "search_space", "data"]
            ),
        )

        # Create model bridge with the consolidated kwargs.
        model_bridge = bridge_class(
            search_space=search_space or not_none(experiment).search_space,
            experiment=experiment,
            data=data,
            model=model,
            **bridge_kwargs,
        )

        # Store all kwargs on model bridge, to be saved on generator run.
        model_bridge._set_kwargs_to_save(
            model_key=self.value,
            model_kwargs=_encode_callables_as_references(model_kwargs),
            bridge_kwargs=_encode_callables_as_references(bridge_kwargs),
        )
        return model_bridge
Пример #2
0
 def _get_model_kwargs(
         info: ModelSetup,
         kwargs: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
     return consolidate_kwargs(
         [get_function_default_arguments(info.model_class), kwargs],
         keywords=get_function_argument_names(info.model_class),
     )
Пример #3
0
 def gen(
     self,
     experiment: Experiment,
     data: Optional[Data] = None,
     n: int = 1,
     **kwargs: Any,
 ) -> GeneratorRun:
     """Produce the next points in the experiment."""
     self.experiment = experiment
     self._set_model(experiment=experiment, data=data or experiment.fetch_data())
     max_parallelism = self._curr.max_parallelism
     num_running = self.num_running_trials_for_current_step
     if max_parallelism is not None and num_running >= max_parallelism:
         raise MaxParallelismReachedException(
             step=self._curr, num_running=num_running
         )
     model = not_none(self.model)
     generator_run = model.gen(
         n=n,
         **consolidate_kwargs(
             kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
             keywords=get_function_argument_names(model.gen),
         ),
     )
     generator_run._generation_step_index = self._curr.index
     self._generator_runs.append(generator_run)
     return generator_run
Пример #4
0
    def gen(
        self,
        experiment: Experiment,
        new_data: Optional[Data] = None,  # Take in just the new data.
        n: int = 1,
        **kwargs: Any,
    ) -> GeneratorRun:
        """Produce the next points in the experiment."""
        self._set_experiment(experiment=experiment)

        # Get arm signatures for each entry in new_data that is indeed new.
        new_arms = self._get_new_arm_signatures(experiment=experiment,
                                                new_data=new_data)
        enough_observed = (len(self._observed) +
                           len(new_arms)) >= self._curr.min_arms_observed
        unlimited_arms = self._curr.num_arms == -1
        enough_generated = (not unlimited_arms
                            and len(self._generated) >= self._curr.num_arms)
        remaining_arms = self._curr.num_arms - len(self._generated)

        # Check that minimum observed_arms is satisfied if it's enforced.
        if self._curr.enforce_num_arms and enough_generated and not enough_observed:
            raise ValueError(
                "All trials for current model have been generated, but not enough "
                "data has been observed to fit next model. Try again when more data "
                "are available.")
            # TODO[Lena, T44021164]: take into account failed trials. Potentially
            # reduce `_generated` count when a trial mentioned in new data failed.
        if (self._curr.enforce_num_arms and not unlimited_arms
                and 0 < remaining_arms < n):
            raise ValueError(
                f"Cannot generate {n} new arms as there are only {remaining_arms} "
                "remaining arms to generate using the current model.")

        all_data = (Data.from_multiple_data(
            data=[self._data, new_data]) if new_data else self._data)

        if self._model is None:
            # Instantiate the first model.
            self._set_current_model(experiment=experiment, data=all_data)
        elif enough_generated and enough_observed:
            # Change to the next model.
            self._change_model(experiment=experiment, data=all_data)
        elif new_data is not None:
            # We're sticking with the curr. model, but should update with new data.
            # pyre-fixme[16]: `Optional` has no attribute `update`.
            self._model.update(experiment=experiment, data=new_data)

        kwargs = consolidate_kwargs(
            kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
            keywords=get_function_argument_names(not_none(self._model).gen),
        )
        gen_run = not_none(self._model).gen(n=n, **kwargs)

        # If nothing failed, update known data, _generated, and _observed.
        self._data = all_data
        self._generated.extend([arm.signature for arm in gen_run.arms])
        self._observed.extend(new_arms)
        self._generator_runs.append(gen_run)
        return gen_run
Пример #5
0
    def gen(self, **model_gen_kwargs: Any) -> GeneratorRun:
        """Generates candidates from the fitted model, using the model gen
        kwargs set on the model spec, alongside any passed as kwargs
        to this function (local kwargs take precedent)

        NOTE: Model must have been fit prior to calling gen()

        Args:
            n: Integer representing how many arms should be in the generator run
                produced by this method. NOTE: Some underlying models may ignore
                the ``n`` and produce a model-determined number of arms. In that
                case this method will also output a generator run with number of
                arms that can differ from ``n``.
            pending_observations: A map from metric name to pending
                observations for that metric, used by some models to avoid
                resuggesting points that are currently being evaluated.
        """
        fitted_model = self.fitted_model
        model_gen_kwargs = consolidate_kwargs(
            kwargs_iterable=[
                self.model_gen_kwargs,
                model_gen_kwargs,
            ],
            keywords=get_function_argument_names(fitted_model.gen),
        )
        return fitted_model.gen(**model_gen_kwargs)
Пример #6
0
    def gen(
        self,
        experiment: Experiment,
        data: Optional[Data] = None,
        n: int = 1,
        **kwargs: Any,
    ) -> GeneratorRun:
        """Produce the next points in the experiment. Additional kwargs passed to
        this method are propagated directly to the underlying model's `gen`, along
        with the `model_gen_kwargs` set on the current generation step.

        Args:
            experiment: Experiment, for which the generation strategy is producing
                a new generator run in the course of `gen`, and to which that
                generator run will be added as trial(s). Information stored on the
                experiment (e.g., trial statuses) is used to determine which model
                will be used to produce the generator run returned from this method.
            data: Optional data to be passed to the underlying model's `gen`, which
                is called within this method and actually produces the resulting
                generator run. By default, data is all data on the `experiment` if
                `use_update` is False and only the new data since the last call to
                this method if `use_update` is True.
            n: Integer representing how many arms should be in the generator run
                produced by this method. NOTE: Some underlying models may ignore
                the `n` and produce a model-determined number of arms. In that
                case this method will also output a generator run with number of
                arms that can differ from `n`.
        """
        self.experiment = experiment
        self._set_or_update_model(data=data)
        self._seen_trial_indices_by_status = deepcopy(
            experiment.trial_indices_by_status
        )
        max_parallelism = self._curr.max_parallelism
        num_running = self.num_running_trials_for_current_step
        if max_parallelism is not None and num_running >= max_parallelism:
            raise MaxParallelismReachedException(
                step_index=self._curr.index,
                model_name=self._curr.model_name,
                num_running=num_running,
            )
        model = not_none(self.model)
        generator_run = model.gen(
            n=n,
            **consolidate_kwargs(
                kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
                keywords=get_function_argument_names(model.gen),
            ),
        )
        generator_run._generation_step_index = self._curr.index
        self._generator_runs.append(generator_run)
        return generator_run
Пример #7
0
 def _get_bridge_kwargs(
     info: ModelSetup, kwargs: Optional[Dict[str, Any]] = None
 ) -> Dict[str, Any]:
     return consolidate_kwargs(
         [
             get_function_default_arguments(info.bridge_class),
             info.standard_bridge_kwargs,
             {"transforms": info.transforms},
             kwargs,
         ],
         keywords=get_function_argument_names(
             info.bridge_class, omit=["experiment", "search_space", "data"]
         ),
     )
Пример #8
0
    def __call__(
        self,
        search_space: Optional[SearchSpace] = None,
        experiment: Optional[Experiment] = None,
        data: Optional[Data] = None,
        silently_filter_kwargs: bool = True,  # TODO[Lena]: default to False
        **kwargs: Any,
    ) -> ModelBridge:
        assert self.value in MODEL_KEY_TO_MODEL_SETUP
        # All model bridges require either a search space or an experiment.
        assert search_space or experiment, "Search space or experiment required."
        model_setup_info = MODEL_KEY_TO_MODEL_SETUP[self.value]
        model_class = model_setup_info.model_class
        bridge_class = model_setup_info.bridge_class
        if not silently_filter_kwargs:
            validate_kwarg_typing(  # TODO[Lena]: T46467254, pragma: no cover
                typed_callables=[model_class, bridge_class],
                search_space=search_space,
                experiment=experiment,
                data=data,
                **kwargs,
            )

        # Create model with consolidated arguments: defaults + passed in kwargs.
        model_kwargs = consolidate_kwargs(
            kwargs_iterable=[get_function_default_arguments(model_class), kwargs],
            keywords=get_function_argument_names(model_class),
        )
        model = model_class(**model_kwargs)

        # Create `ModelBridge`: defaults + standard kwargs + passed in kwargs.
        bridge_kwargs = consolidate_kwargs(
            kwargs_iterable=[
                get_function_default_arguments(bridge_class),
                model_setup_info.standard_bridge_kwargs,
                {"transforms": model_setup_info.transforms},
                kwargs,
            ],
            keywords=get_function_argument_names(
                function=bridge_class, omit=["experiment", "search_space", "data"]
            ),
        )

        # Create model bridge with the consolidated kwargs.
        model_bridge = bridge_class(
            search_space=search_space or not_none(experiment).search_space,
            experiment=experiment,
            data=data,
            model=model,
            **bridge_kwargs,
        )

        # Temporarily ignore Botorch callable & torch-typed arguments, as those
        # are not serializable to JSON out-of-the-box. TODO[Lena]: T46527142
        if isinstance(model, TorchModel):
            model_kwargs = {kw: p for kw, p in model_kwargs.items() if not callable(p)}
            bridge_kwargs = {
                kw: p for kw, p in bridge_kwargs.items() if kw[:5] != "torch"
            }

        # Store all kwargs on model bridge, to be saved on generator run.
        model_bridge._set_kwargs_to_save(
            model_key=self.value, model_kwargs=model_kwargs, bridge_kwargs=bridge_kwargs
        )
        return model_bridge
Пример #9
0
    def _gen_multiple(
        self,
        experiment: Experiment,
        num_generator_runs: int,
        data: Optional[Data] = None,
        n: int = 1,
        pending_observations: Optional[Dict[str,
                                            List[ObservationFeatures]]] = None,
        **kwargs: Any,
    ) -> List[GeneratorRun]:
        """Produce multiple generator runs at once, to be made into multiple
        trials on the experiment.

        NOTE: This is used to ensure that maximum paralellism and number
        of trials per step are not violated when producing many generator
        runs from this generation strategy in a row. Without this function,
        if one generates multiple generator runs without first making any
        of them into running trials, generation strategy cannot enforce that it only
        produces as many generator runs as are allowed by the paralellism
        limit and the limit on number of trials in current step.

        Args:
            experiment: Experiment, for which the generation strategy is producing
                a new generator run in the course of `gen`, and to which that
                generator run will be added as trial(s). Information stored on the
                experiment (e.g., trial statuses) is used to determine which model
                will be used to produce the generator run returned from this method.
            data: Optional data to be passed to the underlying model's `gen`, which
                is called within this method and actually produces the resulting
                generator run. By default, data is all data on the `experiment` if
                `use_update` is False and only the new data since the last call to
                this method if `use_update` is True.
            n: Integer representing how many arms should be in the generator run
                produced by this method. NOTE: Some underlying models may ignore
                the `n` and produce a model-determined number of arms. In that
                case this method will also output a generator run with number of
                arms that can differ from `n`.
            pending_observations: A map from metric name to pending
                observations for that metric, used by some models to avoid
                resuggesting points that are currently being evaluated.
        """
        self.experiment = experiment
        self._maybe_move_to_next_step()
        self._set_or_update_current_model(data=data)
        self._save_seen_trial_indices()

        # Make sure to not make too many generator runs and
        # exceed maximum allowed paralellism for the step.
        num_until_max_parallelism = self._num_remaining_trials_until_max_parallelism(
        )
        if num_until_max_parallelism is not None:
            num_generator_runs = min(num_generator_runs,
                                     num_until_max_parallelism)

        # Make sure not to extend number of trials expected in step.
        if self._curr.enforce_num_trials and self._curr.num_trials > 0:
            num_generator_runs = min(
                num_generator_runs,
                self._curr.num_trials - self.num_can_complete_this_step,
            )

        model = not_none(self.model)
        model_gen_kwargs = consolidate_kwargs(
            kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
            keywords=get_function_argument_names(model.gen),
        )
        generator_runs = []
        for _ in range(num_generator_runs):
            try:
                generator_run = _produce_generator_run_from_model(
                    model=model,
                    input_max_gen_draws=MAX_GEN_DRAWS,
                    n=n,
                    pending_observations=pending_observations,
                    model_gen_kwargs=model_gen_kwargs,
                    should_deduplicate=self._curr.should_deduplicate,
                    arms_by_signature=self.experiment.arms_by_signature,
                )
                generator_run._generation_step_index = self._curr.index
                self._generator_runs.append(generator_run)
                generator_runs.append(generator_run)
            except DataRequiredError as err:
                # Model needs more data, so we log the error and return
                # as many generator runs as we were able to produce, unless
                # no trials were produced at all (in which case its safe to raise).
                if len(generator_runs) == 0:
                    raise
                logger.debug(f"Model required more data: {err}.")
                break

        return generator_runs
Пример #10
0
    def _gen_multiple(
        self,
        experiment: Experiment,
        num_generator_runs: int,
        data: Optional[Data] = None,
        n: int = 1,
        **kwargs: Any,
    ) -> List[GeneratorRun]:
        """Produce multiple generator runs at once, to be made into multiple
        trials on the experiment.

        NOTE: This is used to ensure that maximum paralellism and number
        of trials per step are not violated when producing many generator
        runs from this generation strategy in a row. Without this function,
        if one generates multiple generator runs without first making any
        of them into running trials, generation strategy cannot enforce that it only
        produces as many generator runs as are allowed by the paralellism
        limit and the limit on number of trials in current step.
        """
        self.experiment = experiment
        self._set_or_update_model(data=data)
        self._save_seen_trial_indices()
        max_parallelism = self._curr.max_parallelism
        num_running = self.num_running_trials_this_step

        # Make sure to not make too many generator runs and
        # exceed maximum allowed paralellism for the step.
        if max_parallelism is not None:
            if num_running >= max_parallelism:
                raise MaxParallelismReachedException(
                    step_index=self._curr.index,
                    model_name=self._curr.model_name,
                    num_running=num_running,
                )
            else:
                num_generator_runs = min(num_generator_runs,
                                         max_parallelism - num_running)

        # Make sure not to extend number of trials expected in step.
        if self._curr.enforce_num_trials and self._curr.num_trials > 0:
            num_generator_runs = min(
                num_generator_runs,
                self._curr.num_trials - self.num_can_complete_this_step,
            )

        model = not_none(self.model)
        # TODO[T79183560]: Cloning generator runs here is a temporary measure
        # to ensure a 1-to-1 correspondence between user-facing generator runs
        # and their stored SQL counterparts. This will be no longer needed soon
        # as we move to use foreign keys to avoid storing generotor runs on both
        # experiment and generation strategy like we do now.
        generator_run_clones = []
        for _ in range(num_generator_runs):
            try:
                generator_run = model.gen(
                    n=n,
                    **consolidate_kwargs(
                        kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
                        keywords=get_function_argument_names(model.gen),
                    ),
                )
                generator_run._generation_step_index = self._curr.index
                self._generator_runs.append(generator_run)
                generator_run_clones.append(generator_run.clone())
            except DataRequiredError as err:
                # Model needs more data, so we log the error and return
                # as many generator runs as we were able to produce, unless
                # no trials were produced at all (in which case its safe to raise).
                if len(generator_run_clones) == 0:
                    raise
                logger.debug(f"Model required more data: {err}.")

        return generator_run_clones
Пример #11
0
    def gen(
        self,
        experiment: Experiment,
        data: Optional[Data] = None,
        n: int = 1,
        **kwargs: Any,
    ) -> GeneratorRun:
        """Produce the next points in the experiment."""
        self._set_experiment(experiment=experiment)
        new_arm_signatures = set()
        data = data or experiment.fetch_data()
        if data is not None and not data.df.empty:
            if self._data.df.empty:
                new_data = data.df
            else:
                # Select only the new data to determine how many new arms were
                # evaluated since the generation strategy was last updated with
                # data (find rows that are in `data.df`, but not in `self._data.df`)
                merged = data.df.merge(
                    self._data.df,
                    on=[
                        "arm_name", "trial_index", "metric_name", "mean", "sem"
                    ],
                    how="left",
                    indicator=True,
                )
                new_data = merged[merged["_merge"] == "left_only"]
            # Get arm signatures for each entry in data that the GS hasn't seen yet.
            new_arm_signatures = {
                not_none(experiment.arms_by_name.get(
                    row["arm_name"])).signature
                for _, row in new_data.iterrows()
                if (row["arm_name"] in experiment.arms_by_name
                    and not not_none(experiment.trials.get(
                        row["trial_index"])).status.is_failed)
            }

        enough_observed = (len(self._observed) + len(new_arm_signatures)
                           ) >= self._curr.min_arms_observed
        unlimited_arms = self._curr.num_arms == -1
        enough_generated = (not unlimited_arms
                            and len(self._generated) >= self._curr.num_arms)

        # Check that minimum observed_arms is satisfied if it's enforced.
        if self._curr.enforce_num_arms and enough_generated and not enough_observed:
            raise DataRequiredError(
                "All trials for current model have been generated, but not enough "
                "data has been observed to fit next model. Try again when more data "
                "are available.")
            # TODO[Lena, T44021164]: take into account failed trials. Potentially
            # reduce `_generated` count when a trial mentioned in new data failed.

        lgr = self.last_generator_run

        if enough_generated and enough_observed:
            # Change to the next model.
            self._change_model(experiment=experiment, data=data)
        elif lgr is not None and lgr._model_state_after_gen is not None:
            model_state = not_none(lgr._model_state_after_gen)
            self._set_current_model(experiment=experiment,
                                    data=data,
                                    **model_state)
        else:
            self._set_current_model(experiment=experiment, data=data)

        model = not_none(self._model)
        kwargs = consolidate_kwargs(
            kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
            keywords=get_function_argument_names(not_none(self._model).gen),
        )
        gen_run = model.gen(n=n, **kwargs)

        # If nothing failed, update known data, _generated, and _observed.
        self._data = data
        self._generated.extend([arm.signature for arm in gen_run.arms])
        self._observed.extend(new_arm_signatures)
        self._generator_runs.append(gen_run)
        return gen_run
Пример #12
0
    def _gen_multiple(
        self,
        experiment: Experiment,
        num_generator_runs: int,
        data: Optional[Data] = None,
        n: int = 1,
        pending_observations: Optional[Dict[str,
                                            List[ObservationFeatures]]] = None,
        **kwargs: Any,
    ) -> List[GeneratorRun]:
        """Produce multiple generator runs at once, to be made into multiple
        trials on the experiment.

        NOTE: This is used to ensure that maximum paralellism and number
        of trials per step are not violated when producing many generator
        runs from this generation strategy in a row. Without this function,
        if one generates multiple generator runs without first making any
        of them into running trials, generation strategy cannot enforce that it only
        produces as many generator runs as are allowed by the paralellism
        limit and the limit on number of trials in current step.

        Args:
            experiment: Experiment, for which the generation strategy is producing
                a new generator run in the course of `gen`, and to which that
                generator run will be added as trial(s). Information stored on the
                experiment (e.g., trial statuses) is used to determine which model
                will be used to produce the generator run returned from this method.
            data: Optional data to be passed to the underlying model's `gen`, which
                is called within this method and actually produces the resulting
                generator run. By default, data is all data on the `experiment` if
                `use_update` is False and only the new data since the last call to
                this method if `use_update` is True.
            n: Integer representing how many arms should be in the generator run
                produced by this method. NOTE: Some underlying models may ignore
                the `n` and produce a model-determined number of arms. In that
                case this method will also output a generator run with number of
                arms that can differ from `n`.
            pending_observations: A map from metric name to pending
                observations for that metric, used by some models to avoid
                resuggesting points that are currently being evaluated.
        """
        self.experiment = experiment
        self._set_or_update_model(data=data)
        self._save_seen_trial_indices()
        max_parallelism = self._curr.max_parallelism
        num_running = self.num_running_trials_this_step

        # Make sure to not make too many generator runs and
        # exceed maximum allowed paralellism for the step.
        if max_parallelism is not None:
            if num_running >= max_parallelism:
                raise MaxParallelismReachedException(
                    step_index=self._curr.index,
                    model_name=self._curr.model_name,
                    num_running=num_running,
                )
            else:
                num_generator_runs = min(num_generator_runs,
                                         max_parallelism - num_running)

        # Make sure not to extend number of trials expected in step.
        if self._curr.enforce_num_trials and self._curr.num_trials > 0:
            num_generator_runs = min(
                num_generator_runs,
                self._curr.num_trials - self.num_can_complete_this_step,
            )

        model = not_none(self.model)
        # TODO[T79183560]: Cloning generator runs here is a temporary measure
        # to ensure a 1-to-1 correspondence between user-facing generator runs
        # and their stored SQL counterparts. This will be no longer needed soon
        # as we move to use foreign keys to avoid storing generotor runs on both
        # experiment and generation strategy like we do now.
        generator_run_clones = []
        for _ in range(num_generator_runs):
            try:
                generator_run = model.gen(
                    n=n,
                    pending_observations=pending_observations,
                    **consolidate_kwargs(
                        kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
                        keywords=get_function_argument_names(model.gen),
                    ),
                )
                generator_run._generation_step_index = self._curr.index
                self._generator_runs.append(generator_run)
                generator_run_clones.append(generator_run.clone())
            except DataRequiredError as err:
                # Model needs more data, so we log the error and return
                # as many generator runs as we were able to produce, unless
                # no trials were produced at all (in which case its safe to raise).
                if len(generator_run_clones) == 0:
                    raise
                logger.debug(f"Model required more data: {err}.")
                break

        return generator_run_clones