Exemplo n.º 1
0
    def setUpClass(cls):
        np.random.seed(None)
        df = pd.read_csv(
            "tests/data/adult_missing_values_missing_target_500rows.csv")
        cls.data = {"train": {"X": df[df.columns[:-1]], "y": df["income"]}}

        available_models = list(
            ModelsRegistry.registry[BINARY_CLASSIFICATION].keys())
        model_type = np.random.permutation(available_models)[0]
        model_info = ModelsRegistry.registry[BINARY_CLASSIFICATION][model_type]
        model_params = RandomParameters.get(model_info["params"])
        required_preprocessing = model_info["required_preprocessing"]
        model_additional = model_info["additional"]
        preprocessing_params = PreprocessingTuner.get(required_preprocessing,
                                                      cls.data,
                                                      BINARY_CLASSIFICATION)

        cls.train_params = {
            "additional": model_additional,
            "preprocessing": preprocessing_params,
            "validation": {
                "validation_type": "split",
                "train_ratio": 0.8,
                # "validation_type": "kfold",
                # "k_folds": 5,
                "shuffle": True,
            },
            "learner": {
                "model_type": model_info["class"].algorithm_short_name,
                **model_params,
            },
        }
Exemplo n.º 2
0
    def _get_model_params(self, model_type, X, y, models_cnt):
        model_info = AlgorithmsRegistry.registry[self._ml_task][model_type]
        model_params = RandomParameters.get(model_info["params"],
                                            models_cnt + self._seed)
        required_preprocessing = model_info["required_preprocessing"]
        model_additional = model_info["additional"]
        preprocessing_params = PreprocessingTuner.get(
            required_preprocessing, {"train": {
                "X": X,
                "y": y
            }}, self._ml_task)

        model_params = {
            "additional": model_additional,
            "preprocessing": preprocessing_params,
            "validation": self._validation,
            "learner": {
                "model_type": model_info["class"].algorithm_short_name,
                "ml_task": self._ml_task,
                **model_params,
            },
        }
        num_class = (len(np.unique(y[~pd.isnull(y)]))
                     if self._ml_task == MULTICLASS_CLASSIFICATION else None)
        if num_class is not None:
            model_params["learner"]["num_class"] = num_class

        model_params["ml_task"] = self._ml_task

        return model_params
Exemplo n.º 3
0
 def _get_model_params(self, model_type, X, y):
     model_info = ModelsRegistry.registry[BINARY_CLASSIFICATION][model_type]
     model_params = RandomParameters.get(model_info["params"])
     required_preprocessing = model_info["required_preprocessing"]
     model_additional = model_info["additional"]
     preprocessing_params = PreprocessingTuner.get(
         required_preprocessing, {"train": {"X": X, "y": y}}, BINARY_CLASSIFICATION
     )
     return {
         "additional": model_additional,
         "preprocessing": preprocessing_params,
         "validation": self._validation,
         "learner": {
             "model_type": model_info["class"].algorithm_short_name,
             **model_params,
         },
     }
Exemplo n.º 4
0
    def _get_model_params(self, model_type, seed, params_type="random"):
        model_info = AlgorithmsRegistry.registry[self._ml_task][model_type]

        model_params = None
        if params_type == "default":

            model_params = model_info["default_params"]
            model_params["seed"] = seed

        else:
            model_params = RandomParameters.get(model_info["params"],
                                                seed + self._seed)
        if model_params is None:
            return None

        required_preprocessing = model_info["required_preprocessing"]
        model_additional = model_info["additional"]
        preprocessing_params = PreprocessingTuner.get(required_preprocessing,
                                                      self._data_info,
                                                      self._ml_task)

        model_params = {
            "additional": model_additional,
            "preprocessing": preprocessing_params,
            "validation_strategy": self._validation_strategy,
            "learner": {
                "model_type": model_info["class"].algorithm_short_name,
                "ml_task": self._ml_task,
                "n_jobs": self._n_jobs,
                **model_params,
            },
        }

        if self._data_info.get("num_class") is not None:
            model_params["learner"]["num_class"] = self._data_info.get(
                "num_class")

        model_params["ml_task"] = self._ml_task
        model_params["explain_level"] = self._explain_level

        return model_params
Exemplo n.º 5
0
    def _get_model_params(self, model_type, seed, params_type="random"):
        model_info = AlgorithmsRegistry.registry[self._ml_task][model_type]

        model_params = None
        if params_type == "default":

            model_params = model_info["default_params"]
            model_params["seed"] = seed

        else:
            model_params = RandomParameters.get(model_info["params"], seed + self._seed)
        if model_params is None:
            return None

        # set eval metric
        if model_info["class"].algorithm_short_name == "Xgboost":
            model_params["eval_metric"] = xgboost_eval_metric(
                self._ml_task, self._eval_metric
            )
        if model_info["class"].algorithm_short_name == "LightGBM":
            metric, custom_metric = lightgbm_eval_metric(
                self._ml_task, self._eval_metric
            )
            model_params["metric"] = metric
            model_params["custom_eval_metric_name"] = custom_metric
        if model_info["class"].algorithm_short_name == "CatBoost":
            model_params["eval_metric"] = catboost_eval_metric(
                self._ml_task, self._eval_metric
            )
        elif model_info["class"].algorithm_short_name in [
            "Random Forest",
            "Extra Trees",
        ]:
            model_params["eval_metric_name"] = self._eval_metric
            model_params["ml_task"] = self._ml_task

        required_preprocessing = model_info["required_preprocessing"]
        model_additional = model_info["additional"]
        preprocessing_params = PreprocessingTuner.get(
            required_preprocessing, self._data_info, self._ml_task
        )

        model_params = {
            "additional": model_additional,
            "preprocessing": preprocessing_params,
            "validation_strategy": self._validation_strategy,
            "learner": {
                "model_type": model_info["class"].algorithm_short_name,
                "ml_task": self._ml_task,
                "n_jobs": self._n_jobs,
                **model_params,
            },
            "automl_random_state": self._seed,
        }

        if self._data_info.get("num_class") is not None:
            model_params["learner"]["num_class"] = self._data_info.get("num_class")

        model_params["ml_task"] = self._ml_task
        model_params["explain_level"] = self._explain_level

        return model_params