def test__handling_alias_parameters(): # type: () -> None params = {"reg_alpha": 0.1} _handling_alias_parameters(params) assert "reg_alpha" not in params assert "lambda_l1" in params
def run(self) -> None: """Perform the hyperparameter-tuning with given parameters.""" verbosity = self.auto_options["verbosity"] if verbosity is not None: if verbosity > 1: optuna.logging.set_verbosity(optuna.logging.DEBUG) elif verbosity == 1: optuna.logging.set_verbosity(optuna.logging.INFO) elif verbosity == 0: optuna.logging.set_verbosity(optuna.logging.WARNING) else: optuna.logging.set_verbosity(optuna.logging.CRITICAL) # Handling aliases. _handling_alias_parameters(self.lgbm_params) # Sampling. self.sample_train_set() self.tune_feature_fraction() self.tune_num_leaves() self.tune_bagging() self.tune_feature_fraction_stage2() self.tune_regularization_factors() self.tune_min_data_in_leaf()
def test_handling_alias_parameter() -> None: params = { "num_boost_round": 5, "early_stopping_rounds": 2, "min_data": 0.2, } _handling_alias_parameters(params) assert "min_data" not in params assert "min_data_in_leaf" in params assert params["min_data_in_leaf"] == 0.2
def test_handling_alias_parameter_with_user_supplied_param() -> None: params = { "num_boost_round": 5, "early_stopping_rounds": 2, "eta": 0.5, } _handling_alias_parameters(params) assert "eta" not in params assert "learning_rate" in params assert params["learning_rate"] == 0.5
def run(self) -> None: """Perform the hyperparameter-tuning with given parameters.""" # Suppress log messages. if self.auto_options["verbosity"] == 0: optuna.logging.disable_default_handler() self.lgbm_params["verbose"] = -1 self.lgbm_kwargs["verbose_eval"] = False # Handling aliases. _handling_alias_parameters(self.lgbm_params) # Sampling. self.sample_train_set() self.tune_feature_fraction() self.tune_num_leaves() self.tune_bagging() self.tune_feature_fraction_stage2() self.tune_regularization_factors() self.tune_min_data_in_leaf()
def run(self) -> None: verbosity = self.auto_options["verbosity"] if verbosity is not None: if verbosity > 1: optuna.logging.set_verbosity(optuna.logging.DEBUG) elif verbosity == 1: optuna.logging.set_verbosity(optuna.logging.INFO) elif verbosity == 0: optuna.logging.set_verbosity(optuna.logging.WARNING) else: optuna.logging.set_verbosity(optuna.logging.CRITICAL) # Handling aliases. _handling_alias_parameters(self.lgbm_params) # Sampling. self.sample_train_set() self.tune_feature_fraction(self.n_trials_config[0]) self.tune_num_leaves(self.n_trials_config[1]) self.tune_bagging(self.n_trials_config[2]) self.tune_feature_fraction_stage2(self.n_trials_config[3]) self.tune_regularization_factors(self.n_trials_config[4]) self.tune_min_data_in_leaf()
def fit( self, X: TwoDimArrayLikeType, y: OneDimArrayLikeType, sample_weight: Optional[OneDimArrayLikeType] = None, group: Optional[OneDimArrayLikeType] = None, eval_metric: Optional[Union[Callable, List[str], str]] = None, early_stopping_rounds: Optional[int] = 10, feature_name: Union[List[str], str] = "auto", categorical_feature: Union[List[int], List[str], str] = "auto", callbacks: Optional[List[Callable]] = None, init_model: Optional[Union[lgb.Booster, lgb.LGBMModel, str]] = None, groups: Optional[OneDimArrayLikeType] = None, optuna_callbacks: Optional[List[Callable]] = None, **fit_params: Any ) -> "LGBMModel": """Fit the model according to the given training data. Parameters ---------- X Training data. y Target. sample_weight Weights of training data. group Group data of training data. eval_metric Evaluation metric. See https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric. early_stopping_rounds Used to activate early stopping. The model will train until the validation score stops improving. feature_name Feature names. If 'auto' and data is pandas DataFrame, data columns names are used. categorical_feature Categorical features. If list of int, interpreted as indices. If list of strings, interpreted as feature names. If 'auto' and data is pandas DataFrame, pandas categorical columns are used. All values in categorical features should be less than int32 max value (2147483647). Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. callbacks List of callback functions that are applied at each iteration. init_model Filename of LightGBM model, Booster instance or LGBMModel instance used for continue training. groups Group labels for the samples used while splitting the dataset into train/test set. If `group` is not None, this parameter is ignored. optuna_callbacks List of Optuna callback functions that are invoked at the end of each trial. **fit_params Always ignored. This parameter exists for compatibility. Returns ------- self Return self. """ logger = logging.getLogger(__name__) X, y, sample_weight = check_fit_params( X, y, sample_weight=sample_weight, accept_sparse=True, ensure_min_samples=2, estimator=self, force_all_finite=False, ) # See https://github.com/microsoft/LightGBM/issues/2319 if group is None and groups is not None: groups, _ = pd.factorize(groups) indices = np.argsort(groups) X = _safe_indexing(X, indices) y = _safe_indexing(y, indices) sample_weight = _safe_indexing(sample_weight, indices) groups = _safe_indexing(groups, indices) _, group = np.unique(groups, return_counts=True) n_samples, self._n_features = X.shape # type: Tuple[int, int] self._n_features_in = self._n_features is_classifier = self._estimator_type == "classifier" cv = check_cv(self.cv, y, classifier=is_classifier) seed = self._get_random_state() for key, value in fit_params.items(): logger.warning("{}={} will be ignored.".format(key, value)) params = self.get_params() alias._handling_alias_parameters(params) if ( not any( verbose_alias in params for verbose_alias in ("verbose", "verbosity") ) and self.silent ): params["verbose"] = -1 for attr in ( "class_weight", "cv", "enable_pruning", "importance_type", "n_estimators", "n_trials", "param_distributions", "refit", "silent", "study", "timeout", "model_dir", ): params.pop(attr, None) params["objective"] = self._get_objective() params["random_state"] = seed if self._n_classes is not None and self._n_classes > 2: params["num_classes"] = self._n_classes if callable(eval_metric): params["metric"] = "None" feval = _EvalFunctionWrapper(eval_metric) args = [p.name for p in signature(eval_metric).parameters.values()] if len(args) > 3: eval_name, _, is_higher_better = eval_metric( y, y, sample_weight, group ) elif len(args) > 2: eval_name, _, is_higher_better = eval_metric( y, y, sample_weight ) else: eval_name, _, is_higher_better = eval_metric(y, y) elif isinstance(eval_metric, list): raise ValueError("eval_metric is not allowed to be a list.") else: if eval_metric is None: params["metric"] = OBJECTIVE2METRIC[params["objective"]] else: params["metric"] = eval_metric feval = None eval_name = params["metric"] is_higher_better = _is_higher_better(params["metric"]) fobj = ( _ObjectiveFunctionWrapper(self.objective) if callable(self.objective) else None ) init_model = ( init_model.booster_ if isinstance(init_model, lgb.LGBMModel) else init_model ) self.study_ = self._make_study(is_higher_better) dataset = lgb.Dataset( X, label=y, group=group, weight=sample_weight, feature_name=feature_name, categorical_feature=categorical_feature, ) model_dir = self._get_model_dir() weights = np.array( [ np.sum(sample_weight[train]) for train, _ in cv.split(X, y, groups=groups) ] ) objective = _Objective( params, dataset, eval_name, is_higher_better, n_samples, model_dir, callbacks=callbacks, cv=cv, early_stopping_rounds=early_stopping_rounds, enable_pruning=self.enable_pruning, feval=feval, fobj=fobj, init_model=init_model, n_estimators=self.n_estimators, param_distributions=self.param_distributions, ) logger.info("Searching the best hyperparameters...") start_time = time.perf_counter() self.study_.optimize( objective, callbacks=optuna_callbacks, catch=(), n_trials=self.n_trials, timeout=self.timeout, ) elapsed_time = time.perf_counter() - start_time best_iteration = self.study_.best_trial.user_attrs["best_iteration"] self._best_iteration = ( None if early_stopping_rounds is None else best_iteration ) self._best_score = self.study_.best_value self._objective = params["objective"] self.best_params_ = {**params, **self.study_.best_params} self.n_splits_ = cv.get_n_splits(X, y, groups=groups) logger.info( "Finished hyperparemeter search! " "(elapsed time: {:.3f} sec.) " "The best_iteration is {}.".format(elapsed_time, best_iteration) ) logger.info("Making booster(s)...") start_time = time.perf_counter() self._Booster = self._make_booster( self.best_params_, dataset, best_iteration, self.best_index_, weights, fobj=fobj, feature_name=feature_name, categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model, ) elapsed_time = time.perf_counter() - start_time logger.info( "Finished making booster(s)! " "(elapsed time: {:.3f} sec.)".format(elapsed_time) ) if self.refit: self.refit_time_ = elapsed_time return self