def train(self, data): start_time = time.time() log.debug("IterativeLearner.train") np.random.seed(self.learner_params["seed"]) data = PreprocessingExcludeMissingValues.remove_rows_without_target(data) self.validation = ValidationStep(self.validation_params, data) for train_data, validation_data in self.validation.split(): # the proprocessing is done at every validation step self.preprocessings += [PreprocessingStep(self.preprocessing_params)] train_data, _ = self.preprocessings[-1].run( train_data ) validation_data = self.preprocessings[-1].transform( validation_data ) self.learners += [LearnerFactory.get_learner(self.learner_params)] learner = self.learners[-1] self.callbacks.add_and_set_learner(learner) self.callbacks.on_learner_train_start() for i in range(learner.max_iters): self.callbacks.on_iteration_start() learner.fit(train_data.get("X"), train_data.get("y")) # do a target postprocessing here self.callbacks.on_iteration_end( {"iter_cnt": i}, self.predictions(learner, train_data, validation_data), ) if learner.stop_training: break learner.update({"step": i}) # end of learner iters loop self.callbacks.on_learner_train_end() # end of validation loop self.callbacks.on_framework_train_end() self.train_time = time.time() - start_time
def train(self, results_path, model_subpath): logger.debug( f"ModelFramework.train {self.learner_params.get('model_type')}") start_time = time.time() np.random.seed(self.learner_params["seed"]) optuna_tuner = None if self._optuna_time_budget is not None and OptunaTuner.is_optimizable( self.learner_params.get("model_type", "")): optuna_tuner = OptunaTuner( results_path, ml_task=self._ml_task, eval_metric=self.get_metric(), time_budget=self._optuna_time_budget, init_params=self._optuna_init_params, verbose=self._optuna_verbose, n_jobs=self.learner_params.get("n_jobs", -1), random_state=self._automl_random_state, ) self.validation = ValidationStep(self.validation_params) repeats = self.validation.get_repeats() for repeat in range(repeats): for k_fold in range(self.validation.get_n_splits()): train_data, validation_data = self.validation.get_split( k_fold, repeat) logger.debug( "Data split, train X:{} y:{}, validation X:{}, y:{}". format( train_data["X"].shape, train_data["y"].shape, validation_data["X"].shape, validation_data["y"].shape, )) if "sample_weight" in train_data: logger.debug( "Sample weight available during the training.") # the proprocessing is done at every validation step self.preprocessings += [ Preprocessing(self.preprocessing_params, self.get_name(), k_fold, repeat) ] X_train, y_train, sample_weight = self.preprocessings[ -1].fit_and_transform(train_data["X"], train_data["y"], train_data.get("sample_weight")) ( X_validation, y_validation, sample_weight_validation, ) = self.preprocessings[-1].transform( validation_data["X"], validation_data["y"], validation_data.get("sample_weight"), ) if optuna_tuner is not None: optuna_start_time = time.time() self.learner_params = optuna_tuner.optimize( self.learner_params.get("model_type", ""), self.params.get("data_type", ""), X_train, y_train, sample_weight, X_validation, y_validation, sample_weight_validation, self.learner_params, ) # exclude optuna optimize time from model training start_time += time.time() - optuna_start_time self.learner_params["explain_level"] = self._explain_level self.learners += [ AlgorithmFactory.get_algorithm( copy.deepcopy(self.learner_params)) ] learner = self.learners[-1] learner.set_learner_name(k_fold, repeat, repeats) self.callbacks.add_and_set_learner(learner) self.callbacks.on_learner_train_start() log_to_file = os.path.join(results_path, model_subpath, f"{learner.name}_training.log") for i in range(learner.max_iters): self.callbacks.on_iteration_start() learner.fit( X_train, y_train, sample_weight, X_validation, y_validation, sample_weight_validation, log_to_file, self._max_time_for_learner, ) if self.params.get("injected_sample_weight", False): # print("Dont use sample weight in model evaluation") sample_weight = None sample_weight_validation = None self.callbacks.on_iteration_end( {"iter_cnt": i}, self.predictions( learner, self.preprocessings[-1], X_train, y_train, sample_weight, X_validation, y_validation, sample_weight_validation, ), ) if learner.stop_training: break learner.update({"step": i}) # end of learner iters loop self.callbacks.on_learner_train_end() model_path = os.path.join(results_path, model_subpath) learner.interpret( X_train, y_train, X_validation, y_validation, model_file_path=model_path, learner_name=learner.name, class_names=self.preprocessings[-1].get_target_class_names( ), metric_name=self.get_metric_name(), ml_task=self._ml_task, explain_level=self._explain_level, ) # save learner and free the memory p = os.path.join(model_path, learner.get_fname()) learner.save(p) del learner.model learner.model = None # end of learner training # end of validation loop self.callbacks.on_framework_train_end() # self.get_additional_metrics() self._additional_metrics = self.get_additional_metrics() self.train_time = time.time() - start_time logger.debug("ModelFramework end of training")
def train(self): # , data): logger.debug( f"ModelFramework.train {self.learner_params.get('model_type')}") start_time = time.time() np.random.seed(self.learner_params["seed"]) self.validation = ValidationStep(self.validation_params) for k_fold in range(self.validation.get_n_splits()): train_data, validation_data = self.validation.get_split(k_fold) logger.debug( "Data split, train X:{} y:{}, validation X:{}, y:{}".format( train_data["X"].shape, train_data["y"].shape, validation_data["X"].shape, validation_data["y"].shape, )) # the proprocessing is done at every validation step self.preprocessings += [Preprocessing(self.preprocessing_params)] X_train, y_train = self.preprocessings[-1].fit_and_transform( train_data["X"], train_data["y"]) X_validation, y_validation = self.preprocessings[-1].transform( validation_data["X"], validation_data["y"]) self.learners += [ AlgorithmFactory.get_algorithm(self.learner_params) ] learner = self.learners[-1] self.callbacks.add_and_set_learner(learner) self.callbacks.on_learner_train_start() for i in range(learner.max_iters): self.callbacks.on_iteration_start() learner.fit(X_train, y_train) self.callbacks.on_iteration_end( {"iter_cnt": i}, self.predictions( learner, self.preprocessings[-1], X_train, y_train, X_validation, y_validation, ), ) if learner.stop_training: break learner.update({"step": i}) # end of learner iters loop self.callbacks.on_learner_train_end() # end of validation loop self.callbacks.on_framework_train_end() self.train_time = time.time() - start_time self.get_additional_metrics() logger.debug("ModelFramework end of training")
def train(self, model_path): logger.debug( f"ModelFramework.train {self.learner_params.get('model_type')}") start_time = time.time() np.random.seed(self.learner_params["seed"]) self.validation = ValidationStep(self.validation_params) for k_fold in range(self.validation.get_n_splits()): train_data, validation_data = self.validation.get_split(k_fold) logger.debug( "Data split, train X:{} y:{}, validation X:{}, y:{}".format( train_data["X"].shape, train_data["y"].shape, validation_data["X"].shape, validation_data["y"].shape, )) # the proprocessing is done at every validation step self.preprocessings += [Preprocessing(self.preprocessing_params)] X_train, y_train = self.preprocessings[-1].fit_and_transform( train_data["X"], train_data["y"]) X_validation, y_validation = self.preprocessings[-1].transform( validation_data["X"], validation_data["y"]) self.learner_params["explain_level"] = self._explain_level self.learners += [ AlgorithmFactory.get_algorithm( copy.deepcopy(self.learner_params)) ] learner = self.learners[-1] self.callbacks.add_and_set_learner(learner) self.callbacks.on_learner_train_start() log_to_file = os.path.join(model_path, f"learner_{k_fold+1}_training.log") for i in range(learner.max_iters): self.callbacks.on_iteration_start() learner.fit(X_train, y_train, X_validation, y_validation, log_to_file) self.callbacks.on_iteration_end( {"iter_cnt": i}, self.predictions( learner, self.preprocessings[-1], X_train, y_train, X_validation, y_validation, ), ) if learner.stop_training: break learner.update({"step": i}) # end of learner iters loop self.callbacks.on_learner_train_end() learner.interpret( X_train, y_train, X_validation, y_validation, model_file_path=model_path, learner_name=f"learner_{k_fold+1}", class_names=self.preprocessings[-1].get_target_class_names(), metric_name=self.get_metric_name(), ml_task=self._ml_task, explain_level=self._explain_level, ) # save learner and free the memory p = os.path.join(model_path, f"learner_{k_fold+1}.{learner.file_extension()}") learner.save(p) del learner.model learner.model = None # end of learner training # end of validation loop self.callbacks.on_framework_train_end() self.get_additional_metrics() self.train_time = time.time() - start_time logger.debug("ModelFramework end of training")