def _trial_to_params(trial: Trial): params = {**DEFAULT_PARAMS, # 'gblinear' and 'dart' boosters are too slow "booster": trial.suggest_categorical("booster", ['gbtree']), "seed": trial.suggest_int('seed', 0, 999999), "learning_rate": trial.suggest_loguniform( 'learning_rate', 0.005, 0.5), "lambda": trial.suggest_loguniform("lambda", 1e-8, 1.0), "alpha": trial.suggest_loguniform("alpha", 1e-8, 1.0)} if params['booster'] == 'gbtree' or params['booster'] == 'dart': sampling_method = trial.suggest_categorical( "sampling_method", ["uniform", "gradient_based"]) if sampling_method == 'uniform': subsample = trial.suggest_discrete_uniform('subsample', .5, 1, .05) else: subsample = trial.suggest_discrete_uniform('subsample', .1, 1, .05) params.update({ "max_depth": trial.suggest_int('max_depth', 2, 25), "sampling_method": sampling_method, "subsample": subsample, "colsample_bytree": trial.suggest_discrete_uniform( 'colsample_bytree', .20, 1., .01), "colsample_bylevel": trial.suggest_discrete_uniform( 'colsample_bylevel', .20, 1., .01), "colsample_bynode": trial.suggest_discrete_uniform( 'colsample_bynode', .20, 1., .01), "gamma": trial.suggest_categorical("gamma", [0, 0, 0, 0, 0, 0.01, 0.1, 0.2, 0.3, 0.5, 1., 10., 100.]), "min_child_weight": trial.suggest_categorical('min_child_weight', [1, 1, 1, 1, 2, 3, 4, 5, 1, 6, 7, 8, 9, 10, 11, 15, 30, 60, 100, 1, 1, 1]), "max_delta_step": trial.suggest_categorical("max_delta_step", [0, 0, 0, 0, 0, 1, 2, 5, 8]), "grow_policy": trial.suggest_categorical( "grow_policy", ["depthwise", "lossguide"]), "tree_method": "gpu_hist", "gpu_id": 0}) if params["booster"] == "dart": params.update({ "sample_type": trial.suggest_categorical( "sample_type", ["uniform", "weighted"]), "normalize_type": trial.suggest_categorical( "normalize_type", ["tree", "forest"]), "rate_drop": trial.suggest_loguniform("rate_drop", 1e-8, 1.0), "skip_drop": trial.suggest_loguniform("skip_drop", 1e-8, 1.0)}) return params
def objective(trial: Trial, train_X, train_y, test_X, test_y) -> float: params = { "n_estimators": trial.suggest_int('n_estimators', 0, 1000), 'max_depth': trial.suggest_int('max_depth', 2, 25), 'reg_alpha': trial.suggest_int('reg_alpha', 0, 10), 'reg_lambda': trial.suggest_int('reg_lambda', 0, 10), 'min_child_weight': trial.suggest_int('min_child_weight', 0, 20), 'gamma': trial.suggest_int('gamma', 0, 5), 'learning_rate': trial.suggest_loguniform('learning_rate', 0.0001, 0.5), 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.1, 1, 0.01), 'nthread': -1, 'scale_pos_weight': trial.suggest_int('scale_pos_weight', 1, 10), 'random_state': trial.suggest_int('random_state', 1, 30), 'subsample': trial.suggest_float('subsample', 0.5, 0.9) } model = XGBClassifier(**params) model.fit(train_X, train_y) return cross_val_score(model, test_X, test_y).mean()
def objective(trial: Trial, X_train, X_test, y_train, y_test) -> float: params = { "booster": "gbtree", "n_estimators": trial.suggest_int("n_estimators", 0, 1000), "max_depth": trial.suggest_int("max_depth", 2, 10), "reg_alpha": trial.suggest_int("reg_alpha", 0, 5), "reg_lambda": trial.suggest_int("reg_lambda", 0, 5), "min_child_weight": trial.suggest_int("min_child_weight", 0, 5), "gamma": trial.suggest_int("gamma", 0, 5), "learning_rate": trial.suggest_loguniform("learning_rate", 0.005, 0.5), "colsample_bytree": trial.suggest_discrete_uniform( "colsample_bytree", 0.1, 1, 0.01 ), "nthread": -1, "use_label_encoder": False, "eval_metric": "logloss", } model = MultiOutputRegressor(XGBRegressor(**params)) model.fit(X_train, y_train) n_scores = cross_val_score( model, X_test, y_test, scoring="neg_mean_absolute_error", cv=3, n_jobs=-1 ) return np.mean(abs(n_scores))
def objective(trial: Trial, data) -> float: params = { "booster": "gbtree", #"tree_method": "gpu_hist", "n_estimators": trial.suggest_int("n_estimators", 0, 1000), "max_depth": trial.suggest_int("max_depth", 2, 10), "reg_alpha": trial.suggest_int("reg_alpha", 0, 5), "reg_lambda": trial.suggest_int("reg_lambda", 0, 5), "min_child_weight": trial.suggest_int("min_child_weight", 0, 5), "gamma": trial.suggest_int("gamma", 0, 5), "learning_rate": trial.suggest_loguniform("learning_rate", 0.005, 0.5), "colsample_bytree": trial.suggest_discrete_uniform("colsample_bytree", 0.1, 1, 0.01), "nthread": -1, "use_label_encoder": False, "eval_metric": "logloss" } mae, y, yhat = walk_forward_validation(params, data, 20) return mae
def _suggest(self, trial: optuna.Trial, v: problem.Var) -> float: if v.name in trial.params: if isinstance(trial.params[v.name], str): assert isinstance(v.range, problem.CategoricalRange) return v.range.choices.index(trial.params[v.name]) else: return trial.params[v.name] if isinstance(v.range, problem.ContinuousRange): if v.distribution == problem.Distribution.UNIFORM: return trial.suggest_uniform(v.name, v.range.low, v.range.high) elif v.distribution == problem.Distribution.LOG_UNIFORM: return trial.suggest_loguniform(v.name, v.range.low, v.range.high) elif isinstance(v.range, problem.DiscreteRange): if self._use_discrete_uniform: return trial.suggest_discrete_uniform(v.name, v.range.low, v.range.high - 1, q=1) elif v.distribution == problem.Distribution.LOG_UNIFORM: return trial.suggest_int(v.name, v.range.low, v.range.high - 1, log=True) else: return trial.suggest_int(v.name, v.range.low, v.range.high - 1) elif isinstance(v.range, problem.CategoricalRange): category = trial.suggest_categorical(v.name, v.range.choices) return v.range.choices.index(category) raise ValueError("Unsupported parameter: {}".format(v))
def cnn_pipeline_factory(report_dir: Path, trial: Trial) -> ArmorDigitPipeline: return ArmorDigitKerasPipeline.from_custom_cnn( input_size=32, conv_blocks=((32, 32), (64, 64)), logs_dir=str(report_dir), dropout=trial.suggest_uniform("dropout", 0, 0.99), lr=trial.suggest_loguniform("lr", 1e-5, 1e-1), dense_size=2**round( trial.suggest_discrete_uniform("dense_size_log2", 3, 10, 1)), )
def trial_to_params(trial: optuna.Trial): penalty = trial.suggest_categorical('penalty', ['l1', 'l2', 'elasticnet']) learning_rate = trial.suggest_categorical( 'learning_rate', ['constant', 'optimal', 'invscaling']) params = {**DEFAULT_PARAMS, 'loss': trial.suggest_categorical( 'loss', ['squared_loss', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive']), 'penalty': penalty, 'alpha': trial.suggest_loguniform('alpha', 1e-7, 1.), 'random_state': trial.suggest_int('random_state', 0, 999999), 'learning_rate': learning_rate} if penalty == 'elasticnet': params['l1_ratio'] = trial.suggest_discrete_uniform( 'l1_ratio', .01, .99, .01) if learning_rate in ['constant', 'invscaling']: params['eta0'] = trial.suggest_loguniform('eta0', 1e-7, 1e-1) if learning_rate == 'invscaling': params['power_t'] = trial.suggest_discrete_uniform( 'power_t', 0.1, 0.5, 0.001) return params
def objective(trial: Trial) -> float: params = { "changepoint_range": trial.suggest_discrete_uniform("changepoint_range", 0.8, 0.95, 0.001), "n_changepoints": trial.suggest_int("n_changepoints", 20, 35), "changepoint_prior_scale": trial.suggest_discrete_uniform("changepoint_prior_scale", 0.001, 0.5, 0.001), "seasonality_prior_scale": trial.suggest_discrete_uniform("seasonality_prior_scale", 1, 25, 0.5), "growth": "logistic", "seasonality_mode": "additive", "yearly_seasonality": False, "weekly_seasonality": True, "daily_seasonality": True, } # fit_model m = Prophet(**params) train["cap"] = cap train["floor"] = floor m.fit(train) future = m.make_future_dataframe(periods=163, freq="H") future["cap"] = cap future["floor"] = floor forecast = m.predict(future) valid_forecast = forecast.tail(163) val_rmse = mean_squared_error(valid.y, valid_forecast.yhat, squared=False) return val_rmse
def _construct_trial_grid(trial: optuna.Trial, param_space: Dict): param_grid = {} for name, params in param_space.items(): param_type = params[0] if param_type == "categorical": choices = params[1] param_grid[name] = trial.suggest_categorical(name, choices) elif param_type == "discrete_uniform": low, high, q = params[1], params[2], params[3] param_grid[name] = trial.suggest_discrete_uniform( name, low, high, q) elif param_type == "loguniform": low, high = params[1], params[2] param_grid[name] = trial.suggest_loguniform(name, low, high) elif param_type == "uniform": low, high = params[1], params[2] param_grid[name] = trial.suggest_uniform(name, low, high) elif param_type == "float": low, high = params[1], params[2] step, log = None, False if len(params) > 3: step = params[3] if len(params) > 4: log = params[4] param_grid[name] = trial.suggest_float(name, low, high, step=step, log=log) elif param_type == "int": low, high = params[1], params[2] step, log = 1, False if len(params) > 3: step = params[3] if len(params) > 4: log = params[4] param_grid[name] = trial.suggest_int(name, low, high, step=step, log=log) else: raise ValueError( f"Undefined sampling method given for trial object: {name}: {params}" ) return param_grid
def objective(trial: Trial): # x = trial.suggest_uniform('x', -10, 10) # return (x - 2) ** 2 clean_token_count_limit = int( trial.suggest_discrete_uniform('clean_token_count_limit', 20, 60000, 1)) trial.set_user_attr( 'run', { 'loss': 1.2, # -- store other results like this 'os_uname': os.uname(), 'clean_token_count_limit': clean_token_count_limit, 'attachments': { 'info': 'info', 'output': 'output' } }) if clean_token_count_limit < 10000: return None return float(clean_token_count_limit)**2
def add_suggest(trial: optuna.Trial, user_attrs={}): """ Add hyperparam ranges to an optuna trial and typical user attrs. Usage: trial = optuna.trial.FixedTrial( params={ 'hidden_size': 128, } ) trial = add_suggest(trial) trainer = pl.Trainer() model = LSTM_PL(dict(**trial.params, **trial.user_attrs), dataset_train, dataset_test, cache_base_path, norm) trainer.fit(model) """ trial.suggest_loguniform("learning_rate", 1e-6, 1e-2) trial.suggest_uniform("attention_dropout", 0, 0.75) # we must have nhead<==hidden_size # so nhead_power.max()<==hidden_size_power.min() trial.suggest_discrete_uniform("hidden_size_power", 4, 10, 1) trial.suggest_discrete_uniform("hidden_out_size_power", 4, 9, 1) trial.suggest_discrete_uniform("nhead_power", 1, 4, 1) trial.suggest_int("nlayers", 1, 12) trial.suggest_categorical("use_lstm", [False, True]) trial.suggest_categorical("agg", ['last', 'max', 'mean', 'all']) user_attrs_default = { "batch_size": 16, "grad_clip": 40, "max_nb_epochs": 200, "num_workers": 4, "num_extra_target": 24 * 4, "vis_i": "670", "num_context": 24 * 4, "input_size": 18, "input_size_decoder": 17, "context_in_target": False, "output_size": 1, "patience": 3, 'min_std': 0.005, } [trial.set_user_attr(k, v) for k, v in user_attrs_default.items()] [trial.set_user_attr(k, v) for k, v in user_attrs.items()] return trial
def objective(trial: Trial) -> float: params = { "epochs": trial.suggest_categorical("epochs", [50, 100, 200, 300, 400, 500]), "batch_size": 64, "num_hidden_layers": trial.suggest_int("num_hidden_layers", 0, 5), "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1), "changepoints_range": trial.suggest_discrete_uniform("changepoints_range", 0.8, 0.95, 0.001), "n_changepoints": trial.suggest_int("n_changepoints", 20, 35), "seasonality_mode": "additive", "yearly_seasonality": False, "weekly_seasonality": True, "daily_seasonality": True, "loss_func": "MSE", } # fit_model m = NeuralProphet(**params) m.fit(train, freq="1D") future = m.make_future_dataframe(train, periods=len(valid), n_historic_predictions=True) forecast = m.predict(future) valid_forecast = forecast[forecast.y.isna()] val_rmse = mean_squared_error(valid_forecast.yhat1, valid, squared=False) return val_rmse
def HDL_define_by_run(trial: Trial, df: pd.DataFrame, sub_HDL: dict, name): choices = list(sub_HDL.keys()) choice = trial.suggest_categorical(name, choices) df_ = df.loc[df[name] == choice, :] df = df_ if choice == "None": return df HP = sub_HDL[choice] for hp_name, hp_define in HP.items(): _type = hp_define["_type"] _value = hp_define["_value"] com_hp_name = f"{name}.{choice}.{hp_name}" if _type in ("ordinal", "choice"): v = trial.suggest_categorical(com_hp_name, _value) elif _type in ("int_quniform", "quniform"): v = trial.suggest_discrete_uniform(com_hp_name, *_value) else: raise NotImplementedError if isinstance(v, float): df_ = df.loc[np.abs(df[com_hp_name] - v) < 1e-8, :] else: df_ = df.loc[df[com_hp_name] == v, :] df = df_ return df
def objective(trial: Trial) -> float: params = { "changepoint_range": trial.suggest_discrete_uniform( "changepoint_range", 0.8, 0.95, 0.001 ), "n_changepoints": trial.suggest_int("n_changepoints", 20, 35), "changepoint_prior_scale": trial.suggest_discrete_uniform( "changepoint_prior_scale", 0.001, 0.5, 0.001 ), "seasonality_prior_scale": trial.suggest_discrete_uniform( "seasonality_prior_scale", 1, 25, 0.5 ), "yearly_fourier": trial.suggest_int("yearly_fourier", 5, 15), "monthly_fourier": trial.suggest_int("monthly_fourier", 3, 12), "weekly_fourier": trial.suggest_int("weekly_fourier", 3, 7), "quaterly_fourier": trial.suggest_int("quaterly_fourier", 3, 10), "yearly_prior": trial.suggest_discrete_uniform("yearly_prior", 1, 25, 0.5), "monthly_prior": trial.suggest_discrete_uniform("monthly_prior", 1, 25, 0.5), "weekly_prior": trial.suggest_discrete_uniform("weekly_prior", 1, 25, 0.5), "quaterly_prior": trial.suggest_discrete_uniform("quaterly_prior", 1, 25, 0.5), "growth": "logistic", "seasonality_mode": "additive", "weekly_seasonality": True, "daily_seasonality": True, } # fit_model model = Prophet( changepoint_range=params["changepoint_prior_scale"], n_changepoints=params["n_changepoints"], changepoint_prior_scale=params["changepoint_prior_scale"], seasonality_prior_scale=params["seasonality_prior_scale"], yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True, growth="logistic", seasonality_mode="additive", ) model.add_seasonality( name="yearly", period=365.25, fourier_order=params["yearly_fourier"], prior_scale=params["yearly_prior"], ) model.add_seasonality( name="monthly", period=30.5, fourier_order=params["monthly_fourier"], prior_scale=params["monthly_prior"], ) model.add_seasonality( name="weekly", period=7, fourier_order=params["weekly_fourier"], prior_scale=params["weekly_prior"], ) model.add_seasonality( name="quaterly", period=365.25 / 4, fourier_order=params["quaterly_fourier"], prior_scale=params["quaterly_prior"], ) train["cap"] = cap train["floor"] = floor model.fit(train) future = model.make_future_dataframe(periods=144, freq="d") future["cap"] = cap future["floor"] = floor forecast = model.predict(future) valid_forecast = forecast.tail(7) rmse = mean_squared_error(valid.y, valid_forecast.yhat, squared=False) return rmse
def objective(self, trial:optuna.Trial): """otpuna objective function Args: trial (optuna.Trial): traial object of optuna Returns: [type]: [description] """ logger = ErmineLogger.get_instance() logger.debug("objective") optuna_dict = {} template = self.template optuna_params = self.optuna_params logger.debug(optuna_params) for p_key in optuna_params: p = optuna_params[p_key] logger.debug("p in optuna key "+ p_key + " , " + "val "+ p) if( p.startswith("uniform")): uni_pattern = "uniform\((.*),(.*)\)" # logger.debug("check unipattern " + p) matchobj = re.match(uni_pattern,p) low = float(matchobj.group(1)) high = float(matchobj.group(2)) # print(matchobj.group(0) + "," + matchobj.group(1)) v = trial.suggest_uniform(p,low,high) optuna_dict[p_key] = str(v) elif p.startswith("loguniform"): loguni_pattern = "loguniform\((.*),(.*)\)" # logger.debug("check log unipattern " + p) matchobj = re.match(loguni_pattern,p) low = float(matchobj.group(1)) high = float(matchobj.group(2)) # print(matchobj.group(0) + "," + matchobj.group(1)) v = trial.suggest_loguniform(p,low,high) optuna_dict[p_key] = str(v) elif p.startswith("categorical"): category_pattern = "categorical\((\[.*\])\)" matchobj = re.match(category_pattern,p) str_array = matchobj.group(1) # print(str_array) json_array = json.loads(str_array) # print(json_array) v = trial.suggest_categorical(p,json_array) optuna_dict[p_key]=v elif p.startswith("int"): int_pattern = "int\((.*),(.*)\)" # logger.debug("check int unipattern " + p) matchobj = re.match(int_pattern,p) low = float(matchobj.group(1)) high = float(matchobj.group(2)) # print(matchobj.group(0) + "," + matchobj.group(1)) v = trial.suggest_loguniform(p,low,high) optuna_dict[p_key] = str(v) elif p.startswith("discrete_uniform"): disc_uni_pattern = "discrete_uniform\((.*),(.*),(.*)\)" # logger.debug("check discrete unipattern " + p) matchobj = re.match(disc_uni_pattern,p) logger.debug(matchobj) low = float(matchobj.group(1)) high = float(matchobj.group(2)) q = float(matchobj.group(3)) # print(matchobj.group(0) + "," + matchobj.group(1)+"," + matchobj.group(2)) v = trial.suggest_discrete_uniform(p,low,high,q) optuna_dict[p_key] = str(v) self.logger.debug("optuna trial values") self.logger.debug(optuna_dict) trial_config = self.generate_trial_config(template,optuna_dict) self.logger.debug(trial_config) # print(optuna_dict) bucket:WorkingBucket = self.execute(trial_config) if "Result" in bucket: return bucket["Result"] else: return 0