def save(self, path: str = None, verbose=True) -> str: """ Saves the model to disk. Parameters ---------- path : str, default None Path to the saved model, minus the file name. This should generally be a directory path ending with a '/' character (or appropriate path separator value depending on OS). If None, self.path is used. The final model file is typically saved to path + self.model_file_name. verbose : bool, default True Whether to log the location of the saved file. Returns ------- path : str Path to the saved model, minus the file name. Use this value to load the model from disk via cls.load(path), cls being the class of the model object, such as model = RFModel.load(path) """ if path is None: path = self.path file_path = path + self.model_file_name save_pkl.save(path=file_path, object=self, verbose=verbose) return path
def save_info(self) -> dict: info = self.get_info() save_pkl.save(path=self.path + self.model_info_name, object=info) json_path = self.path + self.model_info_json_name save_json.save(path=json_path, obj=info) return info
def save_artifacts(predictor, leaderboard, config): artifacts = config.framework_params.get('_save_artifacts', ['leaderboard']) try: models_dir = output_subdir("models", config) shutil.rmtree(os.path.join(models_dir, "utils"), ignore_errors=True) if 'leaderboard' in artifacts: save_pd.save(path=os.path.join(models_dir, "leaderboard.csv"), df=leaderboard) if 'info' in artifacts: ag_info = predictor.info() info_dir = output_subdir("info", config) save_pkl.save(path=os.path.join(info_dir, "info.pkl"), object=ag_info) if 'models' in artifacts: utils.zip_path(models_dir, os.path.join(models_dir, "models.zip")) def delete(path, isdir): if isdir: shutil.rmtree(path, ignore_errors=True) elif os.path.splitext(path)[1] == '.pkl': os.remove(path) utils.walk_apply(models_dir, delete, max_depth=0) except Exception: log.warning("Error when saving artifacts.", exc_info=True)
def save(self, path=None, verbose=True, save_oof=True, save_children=False) -> str: if path is None: path = self.path if save_children: model_names = [] for child in self.models: child = self.load_child(child) child.set_contexts(path + child.name + os.path.sep) child.save(verbose=False) model_names.append(child.name) self.models = model_names if save_oof and self._oof_pred_proba is not None: save_pkl.save(path=path + 'utils' + os.path.sep + self._oof_filename, object={ '_oof_pred_proba': self._oof_pred_proba, '_oof_pred_model_repeats': self._oof_pred_model_repeats, }) self._oof_pred_proba = None self._oof_pred_model_repeats = None return super().save(path=path, verbose=verbose)
def save(self): trainer = None if self.trainer is not None: if not self.is_trainer_present: self.trainer.save() trainer = self.trainer self.trainer = None save_pkl.save(path=self.save_path, object=self) self.trainer = trainer
def save(self) -> None: """Save this predictor to file in directory specified by this Predictor's ``path``. Note that :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit` already saves the predictor object automatically (we do not recommend modifying the Predictor object yourself as it tracks many trained models). """ tmp_learner = self._learner self._learner = None save_pkl.save(path=tmp_learner.path + self.predictor_file_name, object=self) self._learner = tmp_learner
def save(self) -> None: models = self.models self.models = {} save_pkl.save(path=self.path_pkl, object=self) for model in self.models.values(): model.save() self.models = models
def save(self): tmp_learner = self._learner tmp_trainer = self._trainer super().save() self._learner = None self._trainer = None save_pkl.save(path=tmp_learner.path + self.predictor_file_name, object=self) self._learner = tmp_learner self._trainer = tmp_trainer
def _callback(env): if not eval_result: _init(env) for data_name, eval_name, result, _ in env.evaluation_result_list: eval_result[data_name][eval_name].append(result) if (interval > 0) and ((env.iteration - offset) % interval == 0) and (env.iteration != 0): # min_error = min(eval_result['valid_set']['multi_error']) # print('iter:', env.iteration, 'min_error:', min_error) save_pkl.save(path=path, object=eval_result)
def save(self, file_prefix=""): """ Additional naming changes will be appended to end of file_prefix (must contain full absolute path) """ dataobj_file = file_prefix + self.DATAOBJ_SUFFIX datalist_file = file_prefix + self.DATAVALUES_SUFFIX data_list = self.dataset._data self.dataset = None # Avoid pickling these self.dataloader = None save_pkl.save(path=dataobj_file, object=self) mx.nd.save(datalist_file, data_list) logger.debug("TabularNN Dataset saved to files: \n %s \n %s" % (dataobj_file, datalist_file))
def save(self) -> None: # todo: remove / revise low_memory logic models = self.models if self.low_memory: self.models = {} try: save_pkl.save(path=self.path_pkl, object=self) except: # noqa self.models = {} save_pkl.save(path=self.path_pkl, object=self) if not self.models: self.models = models
def hyperparameter_tune(self, X_train, y_train, X_val, y_val, scheduler_options, **kwargs): # verbosity = kwargs.get('verbosity', 2) time_start = time.time() logger.log(15, "Starting generic AbstractModel hyperparameter tuning for %s model..." % self.name) self._set_default_searchspace() params_copy = self.params.copy() directory = self.path # also create model directory if it doesn't exist # TODO: This will break on S3. Use tabular/utils/savers for datasets, add new function scheduler_func, scheduler_options = scheduler_options # Unpack tuple if scheduler_func is None or scheduler_options is None: raise ValueError("scheduler_func and scheduler_options cannot be None for hyperparameter tuning") params_copy['num_threads'] = scheduler_options['resource'].get('num_cpus', None) params_copy['num_gpus'] = scheduler_options['resource'].get('num_gpus', None) dataset_train_filename = 'dataset_train.p' train_path = directory + dataset_train_filename save_pkl.save(path=train_path, object=(X_train, y_train)) dataset_val_filename = 'dataset_val.p' val_path = directory + dataset_val_filename save_pkl.save(path=val_path, object=(X_val, y_val)) if not any(isinstance(params_copy[hyperparam], Space) for hyperparam in params_copy): logger.warning("Attempting to do hyperparameter optimization without any search space (all hyperparameters are already fixed values)") else: logger.log(15, "Hyperparameter search space for %s model: " % self.name) for hyperparam in params_copy: if isinstance(params_copy[hyperparam], Space): logger.log(15, f"{hyperparam}: {params_copy[hyperparam]}") util_args = dict( dataset_train_filename=dataset_train_filename, dataset_val_filename=dataset_val_filename, directory=directory, model=self, time_start=time_start, time_limit=scheduler_options['time_out'], ) model_trial.register_args(util_args=util_args, **params_copy) scheduler: FIFOScheduler = scheduler_func(model_trial, **scheduler_options) if ('dist_ip_addrs' in scheduler_options) and (len(scheduler_options['dist_ip_addrs']) > 0): # This is multi-machine setting, so need to copy dataset to workers: logger.log(15, "Uploading data to remote workers...") scheduler.upload_files([train_path, val_path]) # TODO: currently does not work. directory = self.path # TODO: need to change to path to working directory used on every remote machine model_trial.update(directory=directory) logger.log(15, "uploaded") scheduler.run() scheduler.join_jobs() return self._get_hpo_results(scheduler=scheduler, scheduler_options=scheduler_options, time_start=time_start)
def save(self, path: str = None, **kwargs) -> str: if path is None: path = self.path path = Path(path) path.mkdir(exist_ok=True) predictor = self.gts_predictor self.gts_predictor = None with disable_root_logger(): if predictor: Path.mkdir(path / self.gluonts_model_path, exist_ok=True) predictor.serialize(path / self.gluonts_model_path) save_pkl.save(path=str(path / self.model_file_name), object=self) self.gts_predictor = predictor return str(path)
def save_artifacts(predictor, leaderboard, config): artifacts = config.framework_params.get('_save_artifacts', ['leaderboard']) try: if 'leaderboard' in artifacts: leaderboard_dir = output_subdir("leaderboard", config) save_pd.save(path=os.path.join(leaderboard_dir, "leaderboard.csv"), df=leaderboard) if 'info' in artifacts: ag_info = predictor.info() info_dir = output_subdir("info", config) save_pkl.save(path=os.path.join(info_dir, "info.pkl"), object=ag_info) if 'models' in artifacts: shutil.rmtree(os.path.join(predictor.path, "utils"), ignore_errors=True) models_dir = output_subdir("models", config) utils.zip_path(predictor.path, os.path.join(models_dir, "models.zip")) except Exception: log.warning("Error when saving artifacts.", exc_info=True)
def save_info(self, include_model_info=False): info = self.get_info(include_model_info=include_model_info) save_pkl.save(path=self.path + self.learner_info_name, object=info) save_json.save(path=self.path + self.learner_info_json_name, obj=info) return info
def save_model_base(self, model_base): save_pkl.save(path=self.path + 'utils' + os.path.sep + 'model_template.pkl', object=model_base)
def save_val_data(self, data: TimeSeriesDataFrame, verbose: bool = True) -> None: path = self.path_data + "val.pkl" save_pkl.save(path=path, object=data, verbose=verbose)
def hyperparameter_tune(self, X_train, y_train, X_val, y_val, scheduler_options, **kwargs): time_start = time.time() logger.log(15, "Beginning hyperparameter tuning for Gradient Boosting Model...") self._set_default_searchspace() params_copy = self.params.copy() if isinstance(params_copy['min_data_in_leaf'], Int): upper_minleaf = params_copy['min_data_in_leaf'].upper if upper_minleaf > X_train.shape[0]: # TODO: this min_data_in_leaf adjustment based on sample size may not be necessary upper_minleaf = max(1, int(X_train.shape[0] / 5.0)) lower_minleaf = params_copy['min_data_in_leaf'].lower if lower_minleaf > upper_minleaf: lower_minleaf = max(1, int(upper_minleaf / 3.0)) params_copy['min_data_in_leaf'] = Int(lower=lower_minleaf, upper=upper_minleaf) directory = self.path # also create model directory if it doesn't exist # TODO: This will break on S3! Use tabular/utils/savers for datasets, add new function os.makedirs(directory, exist_ok=True) scheduler_func, scheduler_options = scheduler_options # Unpack tuple if scheduler_func is None or scheduler_options is None: raise ValueError("scheduler_func and scheduler_options cannot be None for hyperparameter tuning") num_threads = scheduler_options['resource'].get('num_cpus', -1) params_copy['num_threads'] = num_threads # num_gpus = scheduler_options['resource']['num_gpus'] # TODO: unused dataset_train, dataset_val = self.generate_datasets(X_train=X_train, y_train=y_train, params=params_copy, X_val=X_val, y_val=y_val) dataset_train_filename = "dataset_train.bin" train_file = self.path + dataset_train_filename if os.path.exists(train_file): # clean up old files first os.remove(train_file) dataset_train.save_binary(train_file) dataset_val_filename = "dataset_val.bin" # names without directory info val_file = self.path + dataset_val_filename if os.path.exists(val_file): # clean up old files first os.remove(val_file) dataset_val.save_binary(val_file) dataset_val_pkl_filename = 'dataset_val.pkl' val_pkl_path = directory + dataset_val_pkl_filename save_pkl.save(path=val_pkl_path, object=(X_val, y_val)) if not np.any([isinstance(params_copy[hyperparam], Space) for hyperparam in params_copy]): logger.warning("Attempting to do hyperparameter optimization without any search space (all hyperparameters are already fixed values)") else: logger.log(15, "Hyperparameter search space for Gradient Boosting Model: ") for hyperparam in params_copy: if isinstance(params_copy[hyperparam], Space): logger.log(15, f'{hyperparam}: {params_copy[hyperparam]}') util_args = dict( dataset_train_filename=dataset_train_filename, dataset_val_filename=dataset_val_filename, dataset_val_pkl_filename=dataset_val_pkl_filename, directory=directory, model=self, time_start=time_start, time_limit=scheduler_options['time_out'] ) lgb_trial.register_args(util_args=util_args, **params_copy) scheduler = scheduler_func(lgb_trial, **scheduler_options) if ('dist_ip_addrs' in scheduler_options) and (len(scheduler_options['dist_ip_addrs']) > 0): # This is multi-machine setting, so need to copy dataset to workers: logger.log(15, "Uploading data to remote workers...") scheduler.upload_files([train_file, val_file, val_pkl_path]) # TODO: currently does not work. directory = self.path # TODO: need to change to path to working directory used on every remote machine lgb_trial.update(directory=directory) logger.log(15, "uploaded") scheduler.run() scheduler.join_jobs() return self._get_hpo_results(scheduler=scheduler, scheduler_options=scheduler_options, time_start=time_start)
def save(self, path: str): save_pkl.save(path=path, object=self)
def _callback(env): if ((env.iteration - offset) % interval == 0) & (env.iteration != 0): save_pkl.save(path=path, object=env.model) save_pointer.save(path=latest_model_checkpoint, content_path=path)