Exemplo n.º 1
0
def lgb_cv_tuning(
    dataset_x: pd.DataFrame,
    dataset_y: pd.DataFrame,
    cv_index: List[Tuple[pd.RangeIndex]],
) -> Tuple[lgb.Booster, dict]:
    train_dataset = lgb.Dataset(data=dataset_x, label=dataset_y)
    lgbtuner = LightGBMTunerCV(
        params={
            "objective": "regression",
            "metric": "rmse"
        },
        train_set=train_dataset,
        folds=cv_index,
        nfold=5,
        verbose_eval=False,
        num_boost_round=10000,
        early_stopping_rounds=100,
        return_cvbooster=True,
    )
    lgbtuner.run()
    models = lgbtuner.get_best_booster().boosters
    best_params = lgbtuner.best_params
    print("  Params: ")
    for key, value in best_params.items():
        print("    {}: {}".format(key, value))
    return models, best_params
Exemplo n.º 2
0
    def optimize(self, dtrain: lgbDataset, deval: lgbDataset):
        # Define the base parameters
        if self.objective == "binary":
            params: Dict = {"objective": self.objective}
        elif self.objective == "multiclass":
            params: Dict = {
                "objective": self.objective,
                "metric": "multi_logloss"
            }
        elif self.objective == "regression":
            params: Dict = {"objective": self.objective, "metric": "rmse"}

        if self.verbose:
            params["verbosity"] = 1
        else:
            params["verbosity"] = -1

        if self.objective != "regression":
            params["is_unbalance"] = self.is_unbalance

        if self.objective == "multiclass":
            params["num_class"] = self.num_class

        # Reformat the data for LightGBM cross validation method
        train_set = lgb.Dataset(
            data=pd.concat([dtrain.data, deval.data]).reset_index(drop=True),
            label=pd.concat([dtrain.label,
                             deval.label]).reset_index(drop=True),
            categorical_feature=dtrain.categorical_feature,
            free_raw_data=False,
        )
        train_index = range(len(dtrain.data))
        valid_index = range(len(dtrain.data), len(train_set.data))

        # Run the hyper-parameter tuning
        self.tuner = LightGBMTunerCV(
            params=params,
            train_set=train_set,
            folds=[(train_index, valid_index)],
            verbose_eval=False,
            num_boost_round=1000,
            early_stopping_rounds=50,
        )

        self.tuner.run()

        self.best = self.tuner.best_params
        # since n_estimators is not among the params that Optuna optimizes we
        # need to add it manually. We add a high value since it will be used
        # with early_stopping_rounds
        self.best["n_estimators"] = 1000  # type: ignore
class LGBOptimizerOptuna(object):
    def __init__(
        self,
        objective: str = "binary",
        verbose: bool = False,
    ):

        self.objective = objective
        self.verbose = verbose
        self.best: Dict[str, Any] = {}  # Best hyper-parameters

    def optimize(self, dtrain: lgbDataset, deval: lgbDataset):
        # Define the base parameters
        params: Dict = {"objective": self.objective}  # , "metric": "rmse"}

        if self.verbose:
            params["verbosity"] = 1
        else:
            params["verbosity"] = -1

        # Reformat the data for LightGBM cross validation method
        train_set = lgb.Dataset(
            data=pd.concat([dtrain.data, deval.data]).reset_index(drop=True),
            label=pd.concat([dtrain.label,
                             deval.label]).reset_index(drop=True),
            categorical_feature=dtrain.categorical_feature,
            free_raw_data=False,
        )
        train_index = range(len(dtrain.data))
        valid_index = range(len(dtrain.data), len(train_set.data))

        # Run the hyper-parameter tuning
        self.tuner = LightGBMTunerCV(
            params=params,
            train_set=train_set,
            folds=[(train_index, valid_index)],
            verbose_eval=False,
            num_boost_round=1000,
            early_stopping_rounds=50,
        )

        self.tuner.run()

        self.best = self.tuner.best_params
        # since n_estimators is not among the params that Optuna optimizes we
        # need to add it manually. We add a high value since it will be used
        # with early_stopping_rounds
        self.best["n_estimators"] = 1000  # type: ignore