示例#1
0
    def _num_remaining_trials_until_max_parallelism(
            self,
            raise_max_parallelism_reached_exception: bool = True
    ) -> Optional[int]:
        """Returns how many generator runs (to be made into a trial each) are left to
        generate before the `max_parallelism` limit is reached for the current
        generation step.

        Args:
            raise_max_parallelism_reached_exception: Whether to raise
                ``MaxParallelismReachedException`` if number of trials running in
                this generation step exceeds maximum parallelism for it.
        """
        max_parallelism = self._curr.max_parallelism
        num_running = self.num_running_trials_this_step

        if max_parallelism is None:
            return None  # There was no `max_parallelism` limit.

        if raise_max_parallelism_reached_exception and num_running >= max_parallelism:
            raise MaxParallelismReachedException(
                step_index=self._curr.index,
                model_name=self._curr.model_name,
                num_running=num_running,
            )

        return max_parallelism - num_running
示例#2
0
    def gen(
        self,
        experiment: Experiment,
        data: Optional[Data] = None,
        n: int = 1,
        **kwargs: Any,
    ) -> GeneratorRun:
        """Produce the next points in the experiment. Additional kwargs passed to
        this method are propagated directly to the underlying model's `gen`, along
        with the `model_gen_kwargs` set on the current generation step.

        Args:
            experiment: Experiment, for which the generation strategy is producing
                a new generator run in the course of `gen`, and to which that
                generator run will be added as trial(s). Information stored on the
                experiment (e.g., trial statuses) is used to determine which model
                will be used to produce the generator run returned from this method.
            data: Optional data to be passed to the underlying model's `gen`, which
                is called within this method and actually produces the resulting
                generator run. By default, data is all data on the `experiment` if
                `use_update` is False and only the new data since the last call to
                this method if `use_update` is True.
            n: Integer representing how many arms should be in the generator run
                produced by this method. NOTE: Some underlying models may ignore
                the `n` and produce a model-determined number of arms. In that
                case this method will also output a generator run with number of
                arms that can differ from `n`.
        """
        self.experiment = experiment
        self._set_or_update_model(data=data)
        self._seen_trial_indices_by_status = deepcopy(
            experiment.trial_indices_by_status
        )
        max_parallelism = self._curr.max_parallelism
        num_running = self.num_running_trials_for_current_step
        if max_parallelism is not None and num_running >= max_parallelism:
            raise MaxParallelismReachedException(
                step_index=self._curr.index,
                model_name=self._curr.model_name,
                num_running=num_running,
            )
        model = not_none(self.model)
        generator_run = model.gen(
            n=n,
            **consolidate_kwargs(
                kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
                keywords=get_function_argument_names(model.gen),
            ),
        )
        generator_run._generation_step_index = self._curr.index
        self._generator_runs.append(generator_run)
        return generator_run
示例#3
0
    def _gen_multiple(
        self,
        experiment: Experiment,
        num_generator_runs: int,
        data: Optional[Data] = None,
        n: int = 1,
        **kwargs: Any,
    ) -> List[GeneratorRun]:
        """Produce multiple generator runs at once, to be made into multiple
        trials on the experiment.

        NOTE: This is used to ensure that maximum paralellism and number
        of trials per step are not violated when producing many generator
        runs from this generation strategy in a row. Without this function,
        if one generates multiple generator runs without first making any
        of them into running trials, generation strategy cannot enforce that it only
        produces as many generator runs as are allowed by the paralellism
        limit and the limit on number of trials in current step.
        """
        self.experiment = experiment
        self._set_or_update_model(data=data)
        self._save_seen_trial_indices()
        max_parallelism = self._curr.max_parallelism
        num_running = self.num_running_trials_this_step

        # Make sure to not make too many generator runs and
        # exceed maximum allowed paralellism for the step.
        if max_parallelism is not None:
            if num_running >= max_parallelism:
                raise MaxParallelismReachedException(
                    step_index=self._curr.index,
                    model_name=self._curr.model_name,
                    num_running=num_running,
                )
            else:
                num_generator_runs = min(num_generator_runs,
                                         max_parallelism - num_running)

        # Make sure not to extend number of trials expected in step.
        if self._curr.enforce_num_trials and self._curr.num_trials > 0:
            num_generator_runs = min(
                num_generator_runs,
                self._curr.num_trials - self.num_can_complete_this_step,
            )

        model = not_none(self.model)
        # TODO[T79183560]: Cloning generator runs here is a temporary measure
        # to ensure a 1-to-1 correspondence between user-facing generator runs
        # and their stored SQL counterparts. This will be no longer needed soon
        # as we move to use foreign keys to avoid storing generotor runs on both
        # experiment and generation strategy like we do now.
        generator_run_clones = []
        for _ in range(num_generator_runs):
            try:
                generator_run = model.gen(
                    n=n,
                    **consolidate_kwargs(
                        kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
                        keywords=get_function_argument_names(model.gen),
                    ),
                )
                generator_run._generation_step_index = self._curr.index
                self._generator_runs.append(generator_run)
                generator_run_clones.append(generator_run.clone())
            except DataRequiredError as err:
                # Model needs more data, so we log the error and return
                # as many generator runs as we were able to produce, unless
                # no trials were produced at all (in which case its safe to raise).
                if len(generator_run_clones) == 0:
                    raise
                logger.debug(f"Model required more data: {err}.")

        return generator_run_clones
示例#4
0
    def _gen_multiple(
        self,
        experiment: Experiment,
        num_generator_runs: int,
        data: Optional[Data] = None,
        n: int = 1,
        pending_observations: Optional[Dict[str, List[ObservationFeatures]]] = None,
        **kwargs: Any,
    ) -> List[GeneratorRun]:
        """Produce multiple generator runs at once, to be made into multiple
        trials on the experiment.

        NOTE: This is used to ensure that maximum paralellism and number
        of trials per step are not violated when producing many generator
        runs from this generation strategy in a row. Without this function,
        if one generates multiple generator runs without first making any
        of them into running trials, generation strategy cannot enforce that it only
        produces as many generator runs as are allowed by the paralellism
        limit and the limit on number of trials in current step.

        Args:
            experiment: Experiment, for which the generation strategy is producing
                a new generator run in the course of `gen`, and to which that
                generator run will be added as trial(s). Information stored on the
                experiment (e.g., trial statuses) is used to determine which model
                will be used to produce the generator run returned from this method.
            data: Optional data to be passed to the underlying model's `gen`, which
                is called within this method and actually produces the resulting
                generator run. By default, data is all data on the `experiment` if
                `use_update` is False and only the new data since the last call to
                this method if `use_update` is True.
            n: Integer representing how many arms should be in the generator run
                produced by this method. NOTE: Some underlying models may ignore
                the `n` and produce a model-determined number of arms. In that
                case this method will also output a generator run with number of
                arms that can differ from `n`.
            pending_observations: A map from metric name to pending
                observations for that metric, used by some models to avoid
                resuggesting points that are currently being evaluated.
        """
        self.experiment = experiment
        self._set_or_update_model(data=data)
        self._save_seen_trial_indices()
        max_parallelism = self._curr.max_parallelism
        num_running = self.num_running_trials_this_step

        # Make sure to not make too many generator runs and
        # exceed maximum allowed paralellism for the step.
        if max_parallelism is not None:
            if num_running >= max_parallelism:
                raise MaxParallelismReachedException(
                    step_index=self._curr.index,
                    model_name=self._curr.model_name,
                    num_running=num_running,
                )
            else:
                num_generator_runs = min(
                    num_generator_runs, max_parallelism - num_running
                )

        # Make sure not to extend number of trials expected in step.
        if self._curr.enforce_num_trials and self._curr.num_trials > 0:
            num_generator_runs = min(
                num_generator_runs,
                self._curr.num_trials - self.num_can_complete_this_step,
            )

        model = not_none(self.model)
        generator_runs = []
        for _ in range(num_generator_runs):
            try:
                generator_run = model.gen(
                    n=n,
                    pending_observations=pending_observations,
                    **consolidate_kwargs(
                        kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
                        keywords=get_function_argument_names(model.gen),
                    ),
                )
                generator_run._generation_step_index = self._curr.index
                self._generator_runs.append(generator_run)
                generator_runs.append(generator_run)
            except DataRequiredError as err:
                # Model needs more data, so we log the error and return
                # as many generator runs as we were able to produce, unless
                # no trials were produced at all (in which case its safe to raise).
                if len(generator_runs) == 0:
                    raise
                logger.debug(f"Model required more data: {err}.")
                break

        return generator_runs