示例#1
0
def _persist_to_file(data,
                     expert_type="best_model",
                     partition_label=None,
                     partition_hash=None,
                     path=""):
    """Returns unpickled file or json metadata from directory.

    Parameters
    ----------
    expert_type : string
        options for json format
        ['partition','model_reprs','logical_partition','physical_partition',
        'frequency','horizon','best_model_hash','best_model_name','best_model_repr']
        anything else is pickled (e.g. 'best_model', 'cv_data', 'cv_results')

    partition_label : dict
        Model's metadata used to generate hash for .pkl file.
        e.g. {'country':'Canada', 'city':'Vancouver', 'series':'Number of citizens'}

    partition_hash : string
        Partition's hash generated with `generate_partition_hash` function

    path : str
        Path to the directory, where files are to be stored,
        by default '' resulting in current working directory behaviour
    """

    if partition_hash is None:
        if partition_label is not None:
            partition_hash = generate_partition_hash(partition_label)
        else:
            raise ValueError(
                "Either one of `partition_label` or `partition_hash` must be set."
            )
    elif partition_label is not None:
        raise ValueError(
            "Only one of `partition_label` or `partition_hash` must be set. You set both"
        )

    file_path = Path(path, f"{partition_hash}.{expert_type}")

    if not file_path.parent.exists():
        file_path.parent.mkdir(parents=True)

    if expert_type in [
            "partition",
            "model_reprs",
            "logical_partition",
            "physical_partition",
            "frequency",
            "horizon",
            "best_model_hash",
            "best_model_name",
            "best_model_repr",
    ]:
        with open(file_path, "w") as json_file:
            json.dump(data, json_file)
    else:
        with open(file_path, "wb") as pickle_file:
            pickle.dump(data, pickle_file)
示例#2
0
    def __init__(
        self,
        best_model,
        cv_results,
        cv_data,
        model_reprs,
        partition,
        X_train,
        y_train,
        frequency,
        horizon,
        country_code_column,
    ):
        self.best_model = best_model
        self.cv_results = cv_results
        self.cv_data = cv_data
        self.model_reprs = model_reprs
        self.partition = partition
        self.X_train = X_train
        self.y_train = y_train
        self.frequency = frequency
        self.horizon = horizon
        self.country_code_column = country_code_column

        self.best_model_hash = generate_estimator_hash(best_model)
        self.best_model_cv_data = self.cv_data.rename({self.best_model_hash: "best_model"}, axis=1)[
            ["split", "y_true", "best_model"]
        ]
        self.best_model_name = get_estimator_name(best_model).replace("model__", "")
        self.best_model_cv_results = self.cv_results[self.cv_results["rank_test_score"] == 1].iloc[0]
        self.best_model_repr = self.model_reprs[self.best_model_hash]
        self.partition_hash = generate_partition_hash(self.partition)

        self._persist_attrs = sorted(set(self.__dict__.keys()).difference(["self"]))
        self._df_plot = None