def on_epoch( self, trainer: "GradientDescentTrainer", metrics: Dict[str, Any], epoch: int, is_primary: bool = True, **_: Any, ) -> None: """Check if a training reaches saturation. Args: trainer: AllenNLP's trainer metrics: Dictionary of metrics. epoch: Number of current epoch. is_primary: A flag for AllenNLP internal. """ if not is_primary: return None value = metrics.get(self._monitor) if value is None: return self._trial.report(float(value), epoch) if self._trial.should_prune(): raise TrialPruned()
def objective(trial: Trial) -> float: x = trial.suggest_int("x", 5, 5) if trial.number == 0: return x elif trial.number == 1: trial.report(1, 4) trial.report(2, 7) raise TrialPruned() elif trial.number == 2: trial.report(float("nan"), 3) raise TrialPruned() elif trial.number == 3: raise TrialPruned() else: raise RuntimeError()
def objective(trial: Trial) -> float: x = trial.suggest_int("x", 5, 5) z = trial.suggest_categorical("z", [None]) if trial.number == 0: return x * int(z is None) elif trial.number == 1: trial.report(1, 4) trial.report(2, 7) raise TrialPruned() elif trial.number == 2: trial.report(float("nan"), 3) raise TrialPruned() elif trial.number == 3: raise TrialPruned() else: raise RuntimeError()
def _result_callback(_early_stopper: EarlyStopper, result: Union[float, int], epoch: int) -> None: trial.report(result, step=epoch) if trial.should_prune(): # log pruning result_tracker.log_metrics(metrics=dict(pruned=1), step=epoch) # trial was successful, but has to be ended result_tracker.end_run(success=True) # also show info logger.info( f"Pruned trial: {trial} at epoch {epoch} due to {metric}={result}" ) raise TrialPruned()
def _cross_validate_with_pruning( self, trial, # type: trial_module.Trial estimator, # type: BaseEstimator ): # type: (...) -> Dict[str, OneDimArrayLikeType] if is_classifier(estimator): partial_fit_params = self.fit_params.copy() classes = np.unique(self.y) partial_fit_params.setdefault("classes", classes) else: partial_fit_params = self.fit_params n_splits = self.cv.get_n_splits(self.X, self.y, groups=self.groups) estimators = [clone(estimator) for _ in range(n_splits)] scores = { "fit_time": np.zeros(n_splits), "score_time": np.zeros(n_splits), "test_score": np.empty(n_splits), } if self.return_train_score: scores["train_score"] = np.empty(n_splits) for step in range(self.max_iter): for i, (train, test) in enumerate( self.cv.split(self.X, self.y, groups=self.groups)): out = self._partial_fit_and_score(estimators[i], train, test, partial_fit_params) if self.return_train_score: scores["train_score"][i] = out.pop(0) scores["test_score"][i] = out[0] scores["fit_time"][i] += out[1] scores["score_time"][i] += out[2] intermediate_value = np.nanmean(scores["test_score"]) trial.report(intermediate_value, step=step) if trial.should_prune(): self._store_scores(trial, scores) raise TrialPruned( "trial was pruned at iteration {}.".format(step)) return scores
def run(self) -> float: """Train a model using AllenNLP.""" for package_name in self._include_package: allennlp.common.util.import_module_and_submodules(package_name) # Without the following lines, the transformer model construction only takes place in the # first trial (which would consume some random numbers), and the cached model will be used # in trials afterwards (which would not consume random numbers), leading to inconsistent # results between single trial and multiple trials. To make results reproducible in # multiple trials, we clear the cache before each trial. # TODO(MagiaSN) When AllenNLP has introduced a better API to do this, one should remove # these lines and use the new API instead. For example, use the `_clear_caches()` method # which will be in the next AllenNLP release after 2.4.0. allennlp.common.cached_transformers._model_cache.clear() allennlp.common.cached_transformers._tokenizer_cache.clear() self._set_environment_variables() params = allennlp.common.params.Params(self._build_params()) if "distributed" in params: if OPTUNA_ALLENNLP_DISTRIBUTED_FLAG in os.environ: warnings.warn( "Other process may already exists." " If you have trouble, please unset the environment" " variable `OPTUNA_ALLENNLP_USE_DISTRIBUTED`" " and try it again.") os.environ[OPTUNA_ALLENNLP_DISTRIBUTED_FLAG] = "1" try: allennlp.commands.train.train_model( params=params, serialization_dir=self._serialization_dir, file_friendly_logging=self._file_friendly_logging, force=self._force, include_package=self._include_package, ) except ProcessRaisedException as e: if "raise TrialPruned()" in str(e): raise TrialPruned() metrics = json.load( open(os.path.join(self._serialization_dir, "metrics.json"))) return metrics[self._metrics]
def test_intersection_search_space() -> None: search_space = IntersectionSearchSpace() study = create_study() # No trial. assert search_space.calculate(study) == {} assert search_space.calculate(study) == intersection_search_space(study) # First trial. study.optimize( lambda t: t.suggest_float("y", -3, 3) + t.suggest_int("x", 0, 10), n_trials=1) assert search_space.calculate(study) == { "x": IntUniformDistribution(low=0, high=10), "y": UniformDistribution(low=-3, high=3), } assert search_space.calculate(study) == intersection_search_space(study) # Returning sorted `OrderedDict` instead of `dict`. assert search_space.calculate(study, ordered_dict=True) == OrderedDict([ ("x", IntUniformDistribution(low=0, high=10)), ("y", UniformDistribution(low=-3, high=3)), ]) assert search_space.calculate( study, ordered_dict=True) == intersection_search_space(study, ordered_dict=True) # Second trial (only 'y' parameter is suggested in this trial). study.optimize(lambda t: t.suggest_float("y", -3, 3), n_trials=1) assert search_space.calculate(study) == { "y": UniformDistribution(low=-3, high=3) } assert search_space.calculate(study) == intersection_search_space(study) # Failed or pruned trials are not considered in the calculation of # an intersection search space. def objective(trial: Trial, exception: Exception) -> float: trial.suggest_float("z", 0, 1) raise exception study.optimize(lambda t: objective(t, RuntimeError()), n_trials=1, catch=(RuntimeError, )) study.optimize(lambda t: objective(t, TrialPruned()), n_trials=1) assert search_space.calculate(study) == { "y": UniformDistribution(low=-3, high=3) } assert search_space.calculate(study) == intersection_search_space(study) # If two parameters have the same name but different distributions, # those are regarded as different parameters. study.optimize(lambda t: t.suggest_float("y", -1, 1), n_trials=1) assert search_space.calculate(study) == {} assert search_space.calculate(study) == intersection_search_space(study) # The search space remains empty once it is empty. study.optimize( lambda t: t.suggest_float("y", -3, 3) + t.suggest_int("x", 0, 10), n_trials=1) assert search_space.calculate(study) == {} assert search_space.calculate(study) == intersection_search_space(study)
def test_group_decomposed_search_space() -> None: search_space = _GroupDecomposedSearchSpace() study = create_study() # No trial. assert search_space.calculate(study).search_spaces == [] # A single parameter. study.optimize(lambda t: t.suggest_int("x", 0, 10), n_trials=1) assert search_space.calculate(study).search_spaces == [{ "x": IntUniformDistribution(low=0, high=10) }] # Disjoint parameters. study.optimize( lambda t: t.suggest_int("y", 0, 10) + t.suggest_float("z", -3, 3), n_trials=1) assert search_space.calculate(study).search_spaces == [ { "x": IntUniformDistribution(low=0, high=10) }, { "y": IntUniformDistribution(low=0, high=10), "z": UniformDistribution(low=-3, high=3), }, ] # Parameters which include one of search spaces in the group. study.optimize( lambda t: t.suggest_int("y", 0, 10) + t.suggest_float("z", -3, 3) + t. suggest_float("u", 1e-2, 1e2, log=True) + bool( t.suggest_categorical("v", ["A", "B", "C"])), n_trials=1, ) assert search_space.calculate(study).search_spaces == [ { "x": IntUniformDistribution(low=0, high=10) }, { "y": IntUniformDistribution(low=0, high=10), "z": UniformDistribution(low=-3, high=3), }, { "u": LogUniformDistribution(low=1e-2, high=1e2), "v": CategoricalDistribution(choices=["A", "B", "C"]), }, ] # A parameter which is included by one of search spaces in thew group. study.optimize(lambda t: t.suggest_float("u", 1e-2, 1e2, log=True), n_trials=1) assert search_space.calculate(study).search_spaces == [ { "x": IntUniformDistribution(low=0, high=10) }, { "y": IntUniformDistribution(low=0, high=10), "z": UniformDistribution(low=-3, high=3), }, { "u": LogUniformDistribution(low=1e-2, high=1e2) }, { "v": CategoricalDistribution(choices=["A", "B", "C"]) }, ] # Parameters whose intersection with one of search spaces in the group is not empty. study.optimize(lambda t: t.suggest_int("y", 0, 10) + t.suggest_int( "w", 2, 8, log=True), n_trials=1) assert search_space.calculate(study).search_spaces == [ { "v": CategoricalDistribution(choices=["A", "B", "C"]) }, { "x": IntUniformDistribution(low=0, high=10) }, { "u": LogUniformDistribution(low=1e-2, high=1e2) }, { "y": IntUniformDistribution(low=0, high=10) }, { "z": UniformDistribution(low=-3, high=3) }, { "w": IntLogUniformDistribution(low=2, high=8) }, ] search_space = _GroupDecomposedSearchSpace() study = create_study() # Failed or pruned trials are not considered in the calculation of # an intersection search space. def objective(trial: Trial, exception: Exception) -> float: trial.suggest_float("a", 0, 1) raise exception study.optimize(lambda t: objective(t, RuntimeError()), n_trials=1, catch=(RuntimeError, )) study.optimize(lambda t: objective(t, TrialPruned()), n_trials=1) assert search_space.calculate(study).search_spaces == [] # If two parameters have the same name but different distributions, # the first one takes priority. study.optimize(lambda t: t.suggest_float("a", -1, 1), n_trials=1) study.optimize(lambda t: t.suggest_float("a", 0, 1), n_trials=1) assert search_space.calculate(study).search_spaces == [{ "a": UniformDistribution(low=-1, high=1) }]
def pruned_objective(trial: Trial) -> float: raise TrialPruned()