def _persist_to_file(data, expert_type="best_model", partition_label=None, partition_hash=None, path=""): """Returns unpickled file or json metadata from directory. Parameters ---------- expert_type : string options for json format ['partition','model_reprs','logical_partition','physical_partition', 'frequency','horizon','best_model_hash','best_model_name','best_model_repr'] anything else is pickled (e.g. 'best_model', 'cv_data', 'cv_results') partition_label : dict Model's metadata used to generate hash for .pkl file. e.g. {'country':'Canada', 'city':'Vancouver', 'series':'Number of citizens'} partition_hash : string Partition's hash generated with `generate_partition_hash` function path : str Path to the directory, where files are to be stored, by default '' resulting in current working directory behaviour """ if partition_hash is None: if partition_label is not None: partition_hash = generate_partition_hash(partition_label) else: raise ValueError( "Either one of `partition_label` or `partition_hash` must be set." ) elif partition_label is not None: raise ValueError( "Only one of `partition_label` or `partition_hash` must be set. You set both" ) file_path = Path(path, f"{partition_hash}.{expert_type}") if not file_path.parent.exists(): file_path.parent.mkdir(parents=True) if expert_type in [ "partition", "model_reprs", "logical_partition", "physical_partition", "frequency", "horizon", "best_model_hash", "best_model_name", "best_model_repr", ]: with open(file_path, "w") as json_file: json.dump(data, json_file) else: with open(file_path, "wb") as pickle_file: pickle.dump(data, pickle_file)
def __init__( self, best_model, cv_results, cv_data, model_reprs, partition, X_train, y_train, frequency, horizon, country_code_column, ): self.best_model = best_model self.cv_results = cv_results self.cv_data = cv_data self.model_reprs = model_reprs self.partition = partition self.X_train = X_train self.y_train = y_train self.frequency = frequency self.horizon = horizon self.country_code_column = country_code_column self.best_model_hash = generate_estimator_hash(best_model) self.best_model_cv_data = self.cv_data.rename({self.best_model_hash: "best_model"}, axis=1)[ ["split", "y_true", "best_model"] ] self.best_model_name = get_estimator_name(best_model).replace("model__", "") self.best_model_cv_results = self.cv_results[self.cv_results["rank_test_score"] == 1].iloc[0] self.best_model_repr = self.model_reprs[self.best_model_hash] self.partition_hash = generate_partition_hash(self.partition) self._persist_attrs = sorted(set(self.__dict__.keys()).difference(["self"])) self._df_plot = None