def _num_remaining_trials_until_max_parallelism( self, raise_max_parallelism_reached_exception: bool = True ) -> Optional[int]: """Returns how many generator runs (to be made into a trial each) are left to generate before the `max_parallelism` limit is reached for the current generation step. Args: raise_max_parallelism_reached_exception: Whether to raise ``MaxParallelismReachedException`` if number of trials running in this generation step exceeds maximum parallelism for it. """ max_parallelism = self._curr.max_parallelism num_running = self.num_running_trials_this_step if max_parallelism is None: return None # There was no `max_parallelism` limit. if raise_max_parallelism_reached_exception and num_running >= max_parallelism: raise MaxParallelismReachedException( step_index=self._curr.index, model_name=self._curr.model_name, num_running=num_running, ) return max_parallelism - num_running
def gen( self, experiment: Experiment, data: Optional[Data] = None, n: int = 1, **kwargs: Any, ) -> GeneratorRun: """Produce the next points in the experiment. Additional kwargs passed to this method are propagated directly to the underlying model's `gen`, along with the `model_gen_kwargs` set on the current generation step. Args: experiment: Experiment, for which the generation strategy is producing a new generator run in the course of `gen`, and to which that generator run will be added as trial(s). Information stored on the experiment (e.g., trial statuses) is used to determine which model will be used to produce the generator run returned from this method. data: Optional data to be passed to the underlying model's `gen`, which is called within this method and actually produces the resulting generator run. By default, data is all data on the `experiment` if `use_update` is False and only the new data since the last call to this method if `use_update` is True. n: Integer representing how many arms should be in the generator run produced by this method. NOTE: Some underlying models may ignore the `n` and produce a model-determined number of arms. In that case this method will also output a generator run with number of arms that can differ from `n`. """ self.experiment = experiment self._set_or_update_model(data=data) self._seen_trial_indices_by_status = deepcopy( experiment.trial_indices_by_status ) max_parallelism = self._curr.max_parallelism num_running = self.num_running_trials_for_current_step if max_parallelism is not None and num_running >= max_parallelism: raise MaxParallelismReachedException( step_index=self._curr.index, model_name=self._curr.model_name, num_running=num_running, ) model = not_none(self.model) generator_run = model.gen( n=n, **consolidate_kwargs( kwargs_iterable=[self._curr.model_gen_kwargs, kwargs], keywords=get_function_argument_names(model.gen), ), ) generator_run._generation_step_index = self._curr.index self._generator_runs.append(generator_run) return generator_run
def _gen_multiple( self, experiment: Experiment, num_generator_runs: int, data: Optional[Data] = None, n: int = 1, **kwargs: Any, ) -> List[GeneratorRun]: """Produce multiple generator runs at once, to be made into multiple trials on the experiment. NOTE: This is used to ensure that maximum paralellism and number of trials per step are not violated when producing many generator runs from this generation strategy in a row. Without this function, if one generates multiple generator runs without first making any of them into running trials, generation strategy cannot enforce that it only produces as many generator runs as are allowed by the paralellism limit and the limit on number of trials in current step. """ self.experiment = experiment self._set_or_update_model(data=data) self._save_seen_trial_indices() max_parallelism = self._curr.max_parallelism num_running = self.num_running_trials_this_step # Make sure to not make too many generator runs and # exceed maximum allowed paralellism for the step. if max_parallelism is not None: if num_running >= max_parallelism: raise MaxParallelismReachedException( step_index=self._curr.index, model_name=self._curr.model_name, num_running=num_running, ) else: num_generator_runs = min(num_generator_runs, max_parallelism - num_running) # Make sure not to extend number of trials expected in step. if self._curr.enforce_num_trials and self._curr.num_trials > 0: num_generator_runs = min( num_generator_runs, self._curr.num_trials - self.num_can_complete_this_step, ) model = not_none(self.model) # TODO[T79183560]: Cloning generator runs here is a temporary measure # to ensure a 1-to-1 correspondence between user-facing generator runs # and their stored SQL counterparts. This will be no longer needed soon # as we move to use foreign keys to avoid storing generotor runs on both # experiment and generation strategy like we do now. generator_run_clones = [] for _ in range(num_generator_runs): try: generator_run = model.gen( n=n, **consolidate_kwargs( kwargs_iterable=[self._curr.model_gen_kwargs, kwargs], keywords=get_function_argument_names(model.gen), ), ) generator_run._generation_step_index = self._curr.index self._generator_runs.append(generator_run) generator_run_clones.append(generator_run.clone()) except DataRequiredError as err: # Model needs more data, so we log the error and return # as many generator runs as we were able to produce, unless # no trials were produced at all (in which case its safe to raise). if len(generator_run_clones) == 0: raise logger.debug(f"Model required more data: {err}.") return generator_run_clones
def _gen_multiple( self, experiment: Experiment, num_generator_runs: int, data: Optional[Data] = None, n: int = 1, pending_observations: Optional[Dict[str, List[ObservationFeatures]]] = None, **kwargs: Any, ) -> List[GeneratorRun]: """Produce multiple generator runs at once, to be made into multiple trials on the experiment. NOTE: This is used to ensure that maximum paralellism and number of trials per step are not violated when producing many generator runs from this generation strategy in a row. Without this function, if one generates multiple generator runs without first making any of them into running trials, generation strategy cannot enforce that it only produces as many generator runs as are allowed by the paralellism limit and the limit on number of trials in current step. Args: experiment: Experiment, for which the generation strategy is producing a new generator run in the course of `gen`, and to which that generator run will be added as trial(s). Information stored on the experiment (e.g., trial statuses) is used to determine which model will be used to produce the generator run returned from this method. data: Optional data to be passed to the underlying model's `gen`, which is called within this method and actually produces the resulting generator run. By default, data is all data on the `experiment` if `use_update` is False and only the new data since the last call to this method if `use_update` is True. n: Integer representing how many arms should be in the generator run produced by this method. NOTE: Some underlying models may ignore the `n` and produce a model-determined number of arms. In that case this method will also output a generator run with number of arms that can differ from `n`. pending_observations: A map from metric name to pending observations for that metric, used by some models to avoid resuggesting points that are currently being evaluated. """ self.experiment = experiment self._set_or_update_model(data=data) self._save_seen_trial_indices() max_parallelism = self._curr.max_parallelism num_running = self.num_running_trials_this_step # Make sure to not make too many generator runs and # exceed maximum allowed paralellism for the step. if max_parallelism is not None: if num_running >= max_parallelism: raise MaxParallelismReachedException( step_index=self._curr.index, model_name=self._curr.model_name, num_running=num_running, ) else: num_generator_runs = min( num_generator_runs, max_parallelism - num_running ) # Make sure not to extend number of trials expected in step. if self._curr.enforce_num_trials and self._curr.num_trials > 0: num_generator_runs = min( num_generator_runs, self._curr.num_trials - self.num_can_complete_this_step, ) model = not_none(self.model) generator_runs = [] for _ in range(num_generator_runs): try: generator_run = model.gen( n=n, pending_observations=pending_observations, **consolidate_kwargs( kwargs_iterable=[self._curr.model_gen_kwargs, kwargs], keywords=get_function_argument_names(model.gen), ), ) generator_run._generation_step_index = self._curr.index self._generator_runs.append(generator_run) generator_runs.append(generator_run) except DataRequiredError as err: # Model needs more data, so we log the error and return # as many generator runs as we were able to produce, unless # no trials were produced at all (in which case its safe to raise). if len(generator_runs) == 0: raise logger.debug(f"Model required more data: {err}.") break return generator_runs