def train(self, data):
        start_time = time.time()
        log.debug("IterativeLearner.train")
        np.random.seed(self.learner_params["seed"])
        data = PreprocessingExcludeMissingValues.remove_rows_without_target(data)
        self.validation = ValidationStep(self.validation_params, data)

        for train_data, validation_data in self.validation.split():
            # the proprocessing is done at every validation step
            self.preprocessings += [PreprocessingStep(self.preprocessing_params)]
            train_data, _ = self.preprocessings[-1].run(
                train_data
            )
            validation_data = self.preprocessings[-1].transform(
                validation_data
            )


            self.learners += [LearnerFactory.get_learner(self.learner_params)]
            learner = self.learners[-1]

            self.callbacks.add_and_set_learner(learner)
            self.callbacks.on_learner_train_start()

            for i in range(learner.max_iters):
                self.callbacks.on_iteration_start()
                learner.fit(train_data.get("X"), train_data.get("y"))
                # do a target postprocessing here
                self.callbacks.on_iteration_end(
                    {"iter_cnt": i},
                    self.predictions(learner, train_data, validation_data),
                )
                if learner.stop_training:
                    break
                learner.update({"step": i})
            # end of learner iters loop
            self.callbacks.on_learner_train_end()
        # end of validation loop
        self.callbacks.on_framework_train_end()
        self.train_time = time.time() - start_time
    def train(self, results_path, model_subpath):
        logger.debug(
            f"ModelFramework.train {self.learner_params.get('model_type')}")

        start_time = time.time()
        np.random.seed(self.learner_params["seed"])

        optuna_tuner = None
        if self._optuna_time_budget is not None and OptunaTuner.is_optimizable(
                self.learner_params.get("model_type", "")):
            optuna_tuner = OptunaTuner(
                results_path,
                ml_task=self._ml_task,
                eval_metric=self.get_metric(),
                time_budget=self._optuna_time_budget,
                init_params=self._optuna_init_params,
                verbose=self._optuna_verbose,
                n_jobs=self.learner_params.get("n_jobs", -1),
                random_state=self._automl_random_state,
            )

        self.validation = ValidationStep(self.validation_params)

        repeats = self.validation.get_repeats()
        for repeat in range(repeats):
            for k_fold in range(self.validation.get_n_splits()):
                train_data, validation_data = self.validation.get_split(
                    k_fold, repeat)
                logger.debug(
                    "Data split, train X:{} y:{}, validation X:{}, y:{}".
                    format(
                        train_data["X"].shape,
                        train_data["y"].shape,
                        validation_data["X"].shape,
                        validation_data["y"].shape,
                    ))
                if "sample_weight" in train_data:
                    logger.debug(
                        "Sample weight available during the training.")

                # the proprocessing is done at every validation step
                self.preprocessings += [
                    Preprocessing(self.preprocessing_params, self.get_name(),
                                  k_fold, repeat)
                ]

                X_train, y_train, sample_weight = self.preprocessings[
                    -1].fit_and_transform(train_data["X"], train_data["y"],
                                          train_data.get("sample_weight"))
                (
                    X_validation,
                    y_validation,
                    sample_weight_validation,
                ) = self.preprocessings[-1].transform(
                    validation_data["X"],
                    validation_data["y"],
                    validation_data.get("sample_weight"),
                )

                if optuna_tuner is not None:
                    optuna_start_time = time.time()
                    self.learner_params = optuna_tuner.optimize(
                        self.learner_params.get("model_type", ""),
                        self.params.get("data_type", ""),
                        X_train,
                        y_train,
                        sample_weight,
                        X_validation,
                        y_validation,
                        sample_weight_validation,
                        self.learner_params,
                    )
                    # exclude optuna optimize time from model training
                    start_time += time.time() - optuna_start_time

                self.learner_params["explain_level"] = self._explain_level
                self.learners += [
                    AlgorithmFactory.get_algorithm(
                        copy.deepcopy(self.learner_params))
                ]
                learner = self.learners[-1]
                learner.set_learner_name(k_fold, repeat, repeats)

                self.callbacks.add_and_set_learner(learner)
                self.callbacks.on_learner_train_start()

                log_to_file = os.path.join(results_path, model_subpath,
                                           f"{learner.name}_training.log")

                for i in range(learner.max_iters):

                    self.callbacks.on_iteration_start()

                    learner.fit(
                        X_train,
                        y_train,
                        sample_weight,
                        X_validation,
                        y_validation,
                        sample_weight_validation,
                        log_to_file,
                        self._max_time_for_learner,
                    )

                    if self.params.get("injected_sample_weight", False):
                        # print("Dont use sample weight in model evaluation")
                        sample_weight = None
                        sample_weight_validation = None

                    self.callbacks.on_iteration_end(
                        {"iter_cnt": i},
                        self.predictions(
                            learner,
                            self.preprocessings[-1],
                            X_train,
                            y_train,
                            sample_weight,
                            X_validation,
                            y_validation,
                            sample_weight_validation,
                        ),
                    )

                    if learner.stop_training:
                        break
                    learner.update({"step": i})

                # end of learner iters loop
                self.callbacks.on_learner_train_end()

                model_path = os.path.join(results_path, model_subpath)
                learner.interpret(
                    X_train,
                    y_train,
                    X_validation,
                    y_validation,
                    model_file_path=model_path,
                    learner_name=learner.name,
                    class_names=self.preprocessings[-1].get_target_class_names(
                    ),
                    metric_name=self.get_metric_name(),
                    ml_task=self._ml_task,
                    explain_level=self._explain_level,
                )

                # save learner and free the memory
                p = os.path.join(model_path, learner.get_fname())
                learner.save(p)
                del learner.model
                learner.model = None
                # end of learner training

        # end of validation loop
        self.callbacks.on_framework_train_end()
        # self.get_additional_metrics()
        self._additional_metrics = self.get_additional_metrics()

        self.train_time = time.time() - start_time
        logger.debug("ModelFramework end of training")
示例#3
0
    def train(self):  # , data):
        logger.debug(
            f"ModelFramework.train {self.learner_params.get('model_type')}")

        start_time = time.time()
        np.random.seed(self.learner_params["seed"])

        self.validation = ValidationStep(self.validation_params)

        for k_fold in range(self.validation.get_n_splits()):
            train_data, validation_data = self.validation.get_split(k_fold)
            logger.debug(
                "Data split, train X:{} y:{}, validation X:{}, y:{}".format(
                    train_data["X"].shape,
                    train_data["y"].shape,
                    validation_data["X"].shape,
                    validation_data["y"].shape,
                ))
            # the proprocessing is done at every validation step
            self.preprocessings += [Preprocessing(self.preprocessing_params)]

            X_train, y_train = self.preprocessings[-1].fit_and_transform(
                train_data["X"], train_data["y"])
            X_validation, y_validation = self.preprocessings[-1].transform(
                validation_data["X"], validation_data["y"])

            self.learners += [
                AlgorithmFactory.get_algorithm(self.learner_params)
            ]
            learner = self.learners[-1]

            self.callbacks.add_and_set_learner(learner)
            self.callbacks.on_learner_train_start()

            for i in range(learner.max_iters):
                self.callbacks.on_iteration_start()

                learner.fit(X_train, y_train)

                self.callbacks.on_iteration_end(
                    {"iter_cnt": i},
                    self.predictions(
                        learner,
                        self.preprocessings[-1],
                        X_train,
                        y_train,
                        X_validation,
                        y_validation,
                    ),
                )
                if learner.stop_training:
                    break
                learner.update({"step": i})
            # end of learner iters loop
            self.callbacks.on_learner_train_end()

        # end of validation loop
        self.callbacks.on_framework_train_end()
        self.train_time = time.time() - start_time
        self.get_additional_metrics()
        logger.debug("ModelFramework end of training")
示例#4
0
    def train(self, model_path):
        logger.debug(
            f"ModelFramework.train {self.learner_params.get('model_type')}")

        start_time = time.time()
        np.random.seed(self.learner_params["seed"])

        self.validation = ValidationStep(self.validation_params)

        for k_fold in range(self.validation.get_n_splits()):
            train_data, validation_data = self.validation.get_split(k_fold)
            logger.debug(
                "Data split, train X:{} y:{}, validation X:{}, y:{}".format(
                    train_data["X"].shape,
                    train_data["y"].shape,
                    validation_data["X"].shape,
                    validation_data["y"].shape,
                ))

            # the proprocessing is done at every validation step
            self.preprocessings += [Preprocessing(self.preprocessing_params)]

            X_train, y_train = self.preprocessings[-1].fit_and_transform(
                train_data["X"], train_data["y"])
            X_validation, y_validation = self.preprocessings[-1].transform(
                validation_data["X"], validation_data["y"])

            self.learner_params["explain_level"] = self._explain_level
            self.learners += [
                AlgorithmFactory.get_algorithm(
                    copy.deepcopy(self.learner_params))
            ]
            learner = self.learners[-1]

            self.callbacks.add_and_set_learner(learner)
            self.callbacks.on_learner_train_start()

            log_to_file = os.path.join(model_path,
                                       f"learner_{k_fold+1}_training.log")

            for i in range(learner.max_iters):

                self.callbacks.on_iteration_start()

                learner.fit(X_train, y_train, X_validation, y_validation,
                            log_to_file)

                self.callbacks.on_iteration_end(
                    {"iter_cnt": i},
                    self.predictions(
                        learner,
                        self.preprocessings[-1],
                        X_train,
                        y_train,
                        X_validation,
                        y_validation,
                    ),
                )

                if learner.stop_training:
                    break
                learner.update({"step": i})

            # end of learner iters loop
            self.callbacks.on_learner_train_end()

            learner.interpret(
                X_train,
                y_train,
                X_validation,
                y_validation,
                model_file_path=model_path,
                learner_name=f"learner_{k_fold+1}",
                class_names=self.preprocessings[-1].get_target_class_names(),
                metric_name=self.get_metric_name(),
                ml_task=self._ml_task,
                explain_level=self._explain_level,
            )

            # save learner and free the memory
            p = os.path.join(model_path,
                             f"learner_{k_fold+1}.{learner.file_extension()}")
            learner.save(p)
            del learner.model
            learner.model = None
            # end of learner training

        # end of validation loop
        self.callbacks.on_framework_train_end()
        self.get_additional_metrics()
        self.train_time = time.time() - start_time
        logger.debug("ModelFramework end of training")