def __init__( self, ml_task, X_train, y_train, sample_weight, X_validation, y_validation, sample_weight_validation, eval_metric, cat_features_indices, n_jobs, random_state, ): self.X_train = X_train self.y_train = y_train self.sample_weight = sample_weight self.X_validation = X_validation self.y_validation = y_validation self.sample_weight_validation = sample_weight_validation self.dtrain = lgb.Dataset(self.X_train.to_numpy() if isinstance( self.X_train, pd.DataFrame) else self.X_train, label=self.y_train, weight=self.sample_weight) self.dvalid = lgb.Dataset( self.X_validation.to_numpy() if isinstance( self.X_validation, pd.DataFrame) else self.X_validation, label=self.y_validation, weight=self.sample_weight_validation, ) self.cat_features_indices = cat_features_indices self.eval_metric = eval_metric self.learning_rate = 0.025 self.rounds = 1000 self.early_stopping_rounds = 50 self.seed = random_state self.n_jobs = n_jobs if n_jobs == -1: self.n_jobs = 0 self.objective = "" self.eval_metric_name = "" self.eval_metric_name, self.custom_eval_metric_name = lightgbm_eval_metric( ml_task, eval_metric.name) self.custom_eval_metric = None if self.eval_metric.name == "r2": self.custom_eval_metric = lightgbm_eval_metric_r2 elif self.eval_metric.name == "spearman": self.custom_eval_metric = lightgbm_eval_metric_spearman elif self.eval_metric.name == "pearson": self.custom_eval_metric = lightgbm_eval_metric_pearson elif self.eval_metric.name == "f1": self.custom_eval_metric = lightgbm_eval_metric_f1 elif self.eval_metric.name == "average_precision": self.custom_eval_metric = lightgbm_eval_metric_average_precision elif self.eval_metric.name == "accuracy": self.custom_eval_metric = lightgbm_eval_metric_accuracy self.num_class = (len(np.unique(y_train)) if ml_task == MULTICLASS_CLASSIFICATION else None) self.objective = lightgbm_objective(ml_task, eval_metric.name)
def _get_model_params(self, model_type, seed, params_type="random"): model_info = AlgorithmsRegistry.registry[self._ml_task][model_type] model_params = None if params_type == "default": model_params = model_info["default_params"] model_params["seed"] = seed else: model_params = RandomParameters.get(model_info["params"], seed + self._seed) if model_params is None: return None # set eval metric if model_info["class"].algorithm_short_name == "Xgboost": model_params["eval_metric"] = xgboost_eval_metric( self._ml_task, self._eval_metric ) if model_info["class"].algorithm_short_name == "LightGBM": metric, custom_metric = lightgbm_eval_metric( self._ml_task, self._eval_metric ) model_params["metric"] = metric model_params["custom_eval_metric_name"] = custom_metric if model_info["class"].algorithm_short_name == "CatBoost": model_params["eval_metric"] = catboost_eval_metric( self._ml_task, self._eval_metric ) elif model_info["class"].algorithm_short_name in [ "Random Forest", "Extra Trees", ]: model_params["eval_metric_name"] = self._eval_metric model_params["ml_task"] = self._ml_task required_preprocessing = model_info["required_preprocessing"] model_additional = model_info["additional"] preprocessing_params = PreprocessingTuner.get( required_preprocessing, self._data_info, self._ml_task ) model_params = { "additional": model_additional, "preprocessing": preprocessing_params, "validation_strategy": self._validation_strategy, "learner": { "model_type": model_info["class"].algorithm_short_name, "ml_task": self._ml_task, "n_jobs": self._n_jobs, **model_params, }, "automl_random_state": self._seed, } if self._data_info.get("num_class") is not None: model_params["learner"]["num_class"] = self._data_info.get("num_class") model_params["ml_task"] = self._ml_task model_params["explain_level"] = self._explain_level return model_params