示例#1
0
 def _make_pipeline(self, analysis, feature_transformers, model_create_fn,
                    remote_dir):
     metric = "reward_metric"
     best_config = analysis.get_best_config(metric=metric, mode="max")
     best_logdir = analysis.get_best_logdir(metric=metric, mode="max")
     print("best log dir is ", best_logdir)
     dataframe = analysis.dataframe(metric=metric, mode="max")
     # print(dataframe)
     model_path = os.path.join(best_logdir, dataframe["checkpoint"].iloc[0])
     config = convert_bayes_configs(best_config).copy()
     self._print_config(config)
     model = model_create_fn(config=config)
     if remote_dir is not None:
         all_config = restore_hdfs(
             model_path,
             remote_dir,
             feature_transformers,
             model,
             # config)
         )
     else:
         all_config = restore_zip(
             model_path,
             feature_transformers,
             model,
             # config)
         )
     return TimeSequencePipeline(name=self.name,
                                 feature_transformers=feature_transformers,
                                 model=model,
                                 config=all_config)
    def _make_pipeline(self, trial, feature_transformers, model):
        isinstance(trial, TrialOutput)
        # TODO we need to save fitted parameters (not in config, e.g. min max for scalers, model weights)
        # for both transformers and model
        # temp restore from two files

        self._print_config(trial.config)
        dirname = tempfile.mkdtemp(prefix="automl_")
        try:
            with zipfile.ZipFile(trial.model_path) as zf:
                zf.extractall(dirname)
                # print("files are extracted into" + dirname)
                # print(os.listdir(dirname))

            model_path = os.path.join(dirname, "weights_tune.h5")
            config_path = os.path.join(dirname, "local_config.json")
            local_config = load_config(config_path)
            all_config = trial.config.copy()
            all_config.update(local_config)
            model.restore(model_path, **all_config)
            feature_transformers.restore(**all_config)
        finally:
            shutil.rmtree(dirname)

        # model.restore(model_path)
        # feature_transformers.restore(config_path, **trial.config)

        return TimeSequencePipeline(feature_transformers=feature_transformers,
                                    model=model,
                                    config=all_config)
示例#3
0
 def _make_pipeline(self, trial, feature_transformers, model, remote_dir):
     isinstance(trial, TrialOutput)
     config = convert_bayes_configs(trial.config).copy()
     self._print_config(config)
     if remote_dir is not None:
         all_config = restore_hdfs(trial.model_path,
                                   remote_dir,
                                   feature_transformers,
                                   model,
                                   # config)
                                   )
     else:
         all_config = restore_zip(trial.model_path,
                                  feature_transformers,
                                  model,
                                  # config)
                                  )
     return TimeSequencePipeline(name=self.name,
                                 feature_transformers=feature_transformers,
                                 model=model,
                                 config=all_config)
示例#4
0
 def test_get_default_configs(self):
     ppl = TimeSequencePipeline(name='test')
     ppl.get_default_configs()
    # print(test_df.describe())

    tsp = TimeSequencePredictor(
        dt_col="datetime",
        target_col="value",
        extra_features_col=None,
    )
    pipeline = tsp.fit(train_df,
                       validation_df=val_df,
                       metric="mean_squared_error")

    print(
        "evaluate:",
        pipeline.evaluate(test_df, metric=["mean_squared_error", "r_square"]))
    pred = pipeline.predict(test_df)
    print("predict:", pred.shape)

    save_pipeline_file = "../../../saved_pipeline/"
    pipeline.save(save_pipeline_file)

    new_pipeline = TimeSequencePipeline()
    new_pipeline.restore(save_pipeline_file)
    print(
        "evaluate:",
        new_pipeline.evaluate(test_df,
                              metric=["mean_squared_error", "r_square"]))

    new_pred = new_pipeline.predict(test_df)
    print("predict:", pred.shape)
    np.testing.assert_allclose(pred["value"].values, new_pred["value"].values)