def function_optimize_optuna(trial : Trial): train_it= 600 stack_layers = trial.suggest_int('stack_layers', 1, 3) #2 max_pool_layers = trial.suggest_categorical('max_pool_layers', [True, False]) if max_pool_layers: max_pool_layers = stack_layers else: max_pool_layers = 0 st_filter = int(trial.suggest_discrete_uniform('st_filter', 30, 50, 10)) #40 inc_filter = int(trial.suggest_discrete_uniform('inc_filter', 30, 50, 10)) #50 extra_porc = trial.suggest_int('extra_porc', 1, 4) # 2 input_factor = trial.suggest_categorical('input_factor', [0.5,0.25]) # 0.5 lr = trial.suggest_categorical('learning_rate', [0.001,0.0001]) #loss_string_options = ['cross_entropy','mse'] #loss_string = trial.suggest_categorical('loss_string', loss_string_options) loss_string = 'cross_entropy' replace_max_pool_with_stride = trial.suggest_categorical('replace_max_pool_with_stride', [True,False]) exp_params = { 'tf_config' : tf.ConfigProto(allow_soft_placement = True), 'max_pool_layers' : max_pool_layers, 'stack_layers' : stack_layers, 'input_factor' : input_factor, 'extra_porc' : extra_porc, 'lr' : lr, 'st_filter' : st_filter, 'inc_filter' : inc_filter, 'loss_string' : loss_string, 'replace_max_pool_with_stride' : replace_max_pool_with_stride } print("PARAMS : {0}".format(exp_params)) out_dict,out_folder = train_run(train_it, save_model=True, interactive_plot=False, **exp_params) # save params metric = float(out_dict['global_F1']) trial.set_user_attr('out_path',out_folder) for k in out_dict: if k != 'global_F1': trial.set_user_attr(k,float(out_dict[k])) return metric
def objective(trial: Trial, X: pd.Series, y: pd.Series, workers: int) -> int: # Get architecture and its parameters pipeline = define_hyperparameters(trial=trial) # Evaluate architectures and store settings and metrics scores = cross_validate( pipeline, X, y, cv=3, scoring=["accuracy", "average_precision", "f1"], n_jobs=workers, ) trial.set_user_attr(key="model", value=pipeline) for metric in ("accuracy", "average_precision", "f1"): trial.set_user_attr(key=metric, value=scores[f"test_{metric}"].mean()) return scores["test_accuracy"].mean()
def __call__(self, trial: trial_module.Trial) -> float: params = self._get_params(trial) # type: Dict[str, Any] dataset = copy.copy(self.dataset) callbacks = self._get_callbacks(trial) # type: List[Callable] eval_hist = lgb.cv( params, dataset, callbacks=callbacks, early_stopping_rounds=self.early_stopping_rounds, feval=self.feval, fobj=self.fobj, folds=self.cv, init_model=self.init_model, num_boost_round=self.n_estimators, ) # Dict[str, List[float]] values = eval_hist[ "{}-mean".format(self.eval_name) ] # type: List[float] best_iteration = len(values) # type: int trial.set_user_attr("best_iteration", best_iteration) trial_path = self.model_dir / "trial_{}".format(trial.number) trial_path.mkdir(exist_ok=True, parents=True) boosters = callbacks[0].boosters_ # type: ignore for i, b in enumerate(boosters): b.best_iteration = best_iteration b.free_dataset() booster_path = trial_path / "fold_{}.pkl".format(i) with booster_path.open("wb") as f: pickle.dump(b, f) return values[-1]
def _store_scores(self, trial: Trial, scores: Dict[str, OneDimArrayLikeType]) -> None: for name, array in scores.items(): if name in ["test_score", "train_score"]: for i, score in enumerate(array): trial.set_user_attr("split{}_{}".format(i, name), score) trial.set_user_attr("mean_{}".format(name), np.nanmean(array)) trial.set_user_attr("std_{}".format(name), np.nanstd(array))
def objective(trial: Trial) -> float: """Compute objective value Parameter --------- trial : `Trial` Current trial Returns ------- loss : `float` Loss """ # use pyannote.metrics metric when available try: metric = self.pipeline.get_metric() except NotImplementedError as e: metric = None losses = [] processing_time = [] evaluation_time = [] # instantiate pipeline with value suggested in current trial pipeline = self.pipeline.instantiate( self.pipeline.parameters(trial=trial)) if show_progress != False: progress_bar = tqdm(total=len(inputs), **show_progress) progress_bar.update(0) # accumulate loss for each input for i, input in enumerate(inputs): # process input with pipeline # (and keep track of processing time) before_processing = time.time() output = pipeline(input) after_processing = time.time() processing_time.append(after_processing - before_processing) # evaluate output (and keep track of evaluation time) before_evaluation = time.time() # when metric is not available, use loss method instead if metric is None: loss = pipeline.loss(input, output) losses.append(loss) # when metric is available,`input` is expected to be provided # by a `pyannote.database` protocol else: from pyannote.database import get_annotated _ = metric(input["annotation"], output, uem=get_annotated(input)) after_evaluation = time.time() evaluation_time.append(after_evaluation - before_evaluation) if show_progress != False: progress_bar.update(1) if self.pruner is None: continue trial.report( np.mean(losses) if metric is None else abs(metric), i) if trial.should_prune(i): raise optuna.structs.TrialPruned() if show_progress != False: progress_bar.close() trial.set_user_attr("processing_time", sum(processing_time)) trial.set_user_attr("evaluation_time", sum(evaluation_time)) return np.mean(losses) if metric is None else abs(metric)