예제 #1
0
    def __init__(self,
                 horizon=1,
                 dt_col="datetime",
                 target_col="value",
                 logs_dir="~/zoo_automl_logs",
                 extra_features_col=None,
                 search_alg=None,
                 search_alg_params=None,
                 scheduler=None,
                 scheduler_params=None,
                 name="automl"
                 ):
        """
        Initialize the AutoTS Trainer.

        :param horizon: steps to look forward
        :param dt_col: the datetime column
        :param target_col: the target column to forecast
        :param extra_features_col: extra feature columns
        """
        target_col_list = target_col
        if isinstance(target_col, str):
            target_col_list = [target_col]
        self.internal = TimeSequencePredictor(
            dt_col=dt_col,
            target_col=target_col_list,
            logs_dir=logs_dir,
            future_seq_len=horizon,
            extra_features_col=extra_features_col,
            search_alg=search_alg,
            search_alg_params=search_alg_params,
            scheduler=scheduler,
            scheduler_params=scheduler_params,
            name=name
        )
 def test_fit_BayesRecipe(self):
     from zoo.zouwu.config.recipe import BayesRecipe
     train_df, _, future_seq_len = self.create_dataset()
     tsp = TimeSequencePredictor(dt_col="datetime",
                                 target_col="value",
                                 future_seq_len=future_seq_len,
                                 extra_features_col=None,
                                 search_alg="BayesOpt",
                                 search_alg_params={
                                     "utility_kwargs": {
                                         "kind": "ucb",
                                         "kappa": 2.5,
                                         "xi": 0.0
                                     }
                                 }
                                 )
     pipeline = tsp.fit(
         train_df, recipe=BayesRecipe(
             num_samples=1,
             training_iteration=2,
             epochs=1,
             look_back=(3, 5)
         ))
     assert isinstance(pipeline, TimeSequencePipeline)
     assert isinstance(
         pipeline.feature_transformers,
         TimeSequenceFeatureTransformer)
     assert isinstance(pipeline.model, BaseModel)
     assert pipeline.config is not None
     assert "epochs" in pipeline.config
     assert [config_name for config_name in pipeline.config
             if config_name.endswith('float')] == []
     assert 'past_seq_len' in pipeline.config
     assert 3 <= pipeline.config["past_seq_len"] <= 5
 def test_fit_SmokeRecipe(self):
     train_df, validation_df, future_seq_len = self.create_dataset()
     tsp = TimeSequencePredictor(dt_col="datetime",
                                 target_col="value",
                                 future_seq_len=future_seq_len,
                                 extra_features_col=None, )
     pipeline = tsp.fit(train_df, validation_df)
     assert isinstance(pipeline, TimeSequencePipeline)
     assert isinstance(
         pipeline.feature_transformers,
         TimeSequenceFeatureTransformer)
     assert isinstance(pipeline.model, BaseModel)
     assert pipeline.config is not None
예제 #4
0
 def get_input_tsp(self, future_seq_len, target_col):
     sample_num = np.random.randint(100, 200)
     test_sample_num = np.random.randint(20, 30)
     if isinstance(target_col, str):
         train_df = pd.DataFrame({
             "datetime":
             pd.date_range('1/1/2019', periods=sample_num),
             target_col:
             np.random.randn(sample_num)
         })
         test_df = pd.DataFrame({
             "datetime":
             pd.date_range('1/1/2019', periods=test_sample_num),
             target_col:
             np.random.randn(test_sample_num)
         })
     else:
         train_df = pd.DataFrame(
             {t: np.random.randn(sample_num)
              for t in target_col})
         train_df["datetime"] = pd.date_range('1/1/2019',
                                              periods=sample_num)
         test_df = pd.DataFrame(
             {t: np.random.randn(test_sample_num)
              for t in target_col})
         test_df["datetime"] = pd.date_range('1/1/2019',
                                             periods=test_sample_num)
     tsp = TimeSequencePredictor(
         dt_col="datetime",
         target_col=target_col,
         future_seq_len=future_seq_len,
         extra_features_col=None,
     )
     return train_df, test_df, tsp, test_sample_num
 def test_fit_LSTMGridRandomRecipe(self):
     from zoo.zouwu.config.recipe import LSTMGridRandomRecipe
     train_df, _, future_seq_len = self.create_dataset()
     tsp = TimeSequencePredictor(dt_col="datetime",
                                 target_col="value",
                                 future_seq_len=future_seq_len,
                                 extra_features_col=None, )
     pipeline = tsp.fit(train_df,
                        recipe=LSTMGridRandomRecipe(
                            lstm_2_units=[4],
                            batch_size=[1024],
                            num_rand_samples=5,
                            look_back=2,
                            training_iteration=1,
                            epochs=1))
     assert isinstance(pipeline, TimeSequencePipeline)
     assert isinstance(
         pipeline.feature_transformers,
         TimeSequenceFeatureTransformer)
     assert isinstance(pipeline.model, BaseModel)
     assert pipeline.config is not None
     assert 'past_seq_len' in pipeline.config
     assert pipeline.config["past_seq_len"] == 2
예제 #6
0
class AutoTSTrainer:
    """
    The Automated Time Series Forecast Trainer
    """

    def __init__(self,
                 horizon=1,
                 dt_col="datetime",
                 target_col="value",
                 logs_dir="~/zoo_automl_logs",
                 extra_features_col=None,
                 search_alg=None,
                 search_alg_params=None,
                 scheduler=None,
                 scheduler_params=None,
                 name="automl"
                 ):
        """
        Initialize the AutoTS Trainer.

        :param horizon: steps to look forward
        :param dt_col: the datetime column
        :param target_col: the target column to forecast
        :param extra_features_col: extra feature columns
        """
        target_col_list = target_col
        if isinstance(target_col, str):
            target_col_list = [target_col]
        self.internal = TimeSequencePredictor(
            dt_col=dt_col,
            target_col=target_col_list,
            logs_dir=logs_dir,
            future_seq_len=horizon,
            extra_features_col=extra_features_col,
            search_alg=search_alg,
            search_alg_params=search_alg_params,
            scheduler=scheduler,
            scheduler_params=scheduler_params,
            name=name
        )

    def fit(self,
            train_df,
            validation_df=None,
            metric="mse",
            recipe: Recipe = SmokeRecipe(),
            uncertainty: bool = False,
            upload_dir=None,
            ):
        """
        Fit a time series forecasting pipeline w/ automl
        :param train_df: the input dataframe (as pandas.dataframe)
        :param validation_df: the validation dataframe (as pandas.dataframe)
        :param recipe: the configuration of searching
        :param metric: the evaluation metric to optimize
        :param uncertainty: whether to enable uncertainty calculation
                            (will output an uncertainty sigma)
        :param upload_dir: Optional URI to sync training results and checkpoints. We only support
            hdfs URI for now.
        :return a TSPipeline
        """
        zoo_pipeline = self.internal.fit(train_df,
                                         validation_df,
                                         metric,
                                         recipe,
                                         mc=uncertainty,
                                         upload_dir=upload_dir)
        ppl = TSPipeline()
        ppl.internal = zoo_pipeline
        return ppl