def __init__(self, horizon=1, dt_col="datetime", target_col="value", logs_dir="~/zoo_automl_logs", extra_features_col=None, search_alg=None, search_alg_params=None, scheduler=None, scheduler_params=None, name="automl" ): """ Initialize the AutoTS Trainer. :param horizon: steps to look forward :param dt_col: the datetime column :param target_col: the target column to forecast :param extra_features_col: extra feature columns """ target_col_list = target_col if isinstance(target_col, str): target_col_list = [target_col] self.internal = TimeSequencePredictor( dt_col=dt_col, target_col=target_col_list, logs_dir=logs_dir, future_seq_len=horizon, extra_features_col=extra_features_col, search_alg=search_alg, search_alg_params=search_alg_params, scheduler=scheduler, scheduler_params=scheduler_params, name=name )
def test_fit_BayesRecipe(self): from zoo.zouwu.config.recipe import BayesRecipe train_df, _, future_seq_len = self.create_dataset() tsp = TimeSequencePredictor(dt_col="datetime", target_col="value", future_seq_len=future_seq_len, extra_features_col=None, search_alg="BayesOpt", search_alg_params={ "utility_kwargs": { "kind": "ucb", "kappa": 2.5, "xi": 0.0 } } ) pipeline = tsp.fit( train_df, recipe=BayesRecipe( num_samples=1, training_iteration=2, epochs=1, look_back=(3, 5) )) assert isinstance(pipeline, TimeSequencePipeline) assert isinstance( pipeline.feature_transformers, TimeSequenceFeatureTransformer) assert isinstance(pipeline.model, BaseModel) assert pipeline.config is not None assert "epochs" in pipeline.config assert [config_name for config_name in pipeline.config if config_name.endswith('float')] == [] assert 'past_seq_len' in pipeline.config assert 3 <= pipeline.config["past_seq_len"] <= 5
def test_fit_SmokeRecipe(self): train_df, validation_df, future_seq_len = self.create_dataset() tsp = TimeSequencePredictor(dt_col="datetime", target_col="value", future_seq_len=future_seq_len, extra_features_col=None, ) pipeline = tsp.fit(train_df, validation_df) assert isinstance(pipeline, TimeSequencePipeline) assert isinstance( pipeline.feature_transformers, TimeSequenceFeatureTransformer) assert isinstance(pipeline.model, BaseModel) assert pipeline.config is not None
def get_input_tsp(self, future_seq_len, target_col): sample_num = np.random.randint(100, 200) test_sample_num = np.random.randint(20, 30) if isinstance(target_col, str): train_df = pd.DataFrame({ "datetime": pd.date_range('1/1/2019', periods=sample_num), target_col: np.random.randn(sample_num) }) test_df = pd.DataFrame({ "datetime": pd.date_range('1/1/2019', periods=test_sample_num), target_col: np.random.randn(test_sample_num) }) else: train_df = pd.DataFrame( {t: np.random.randn(sample_num) for t in target_col}) train_df["datetime"] = pd.date_range('1/1/2019', periods=sample_num) test_df = pd.DataFrame( {t: np.random.randn(test_sample_num) for t in target_col}) test_df["datetime"] = pd.date_range('1/1/2019', periods=test_sample_num) tsp = TimeSequencePredictor( dt_col="datetime", target_col=target_col, future_seq_len=future_seq_len, extra_features_col=None, ) return train_df, test_df, tsp, test_sample_num
def test_fit_LSTMGridRandomRecipe(self): from zoo.zouwu.config.recipe import LSTMGridRandomRecipe train_df, _, future_seq_len = self.create_dataset() tsp = TimeSequencePredictor(dt_col="datetime", target_col="value", future_seq_len=future_seq_len, extra_features_col=None, ) pipeline = tsp.fit(train_df, recipe=LSTMGridRandomRecipe( lstm_2_units=[4], batch_size=[1024], num_rand_samples=5, look_back=2, training_iteration=1, epochs=1)) assert isinstance(pipeline, TimeSequencePipeline) assert isinstance( pipeline.feature_transformers, TimeSequenceFeatureTransformer) assert isinstance(pipeline.model, BaseModel) assert pipeline.config is not None assert 'past_seq_len' in pipeline.config assert pipeline.config["past_seq_len"] == 2
class AutoTSTrainer: """ The Automated Time Series Forecast Trainer """ def __init__(self, horizon=1, dt_col="datetime", target_col="value", logs_dir="~/zoo_automl_logs", extra_features_col=None, search_alg=None, search_alg_params=None, scheduler=None, scheduler_params=None, name="automl" ): """ Initialize the AutoTS Trainer. :param horizon: steps to look forward :param dt_col: the datetime column :param target_col: the target column to forecast :param extra_features_col: extra feature columns """ target_col_list = target_col if isinstance(target_col, str): target_col_list = [target_col] self.internal = TimeSequencePredictor( dt_col=dt_col, target_col=target_col_list, logs_dir=logs_dir, future_seq_len=horizon, extra_features_col=extra_features_col, search_alg=search_alg, search_alg_params=search_alg_params, scheduler=scheduler, scheduler_params=scheduler_params, name=name ) def fit(self, train_df, validation_df=None, metric="mse", recipe: Recipe = SmokeRecipe(), uncertainty: bool = False, upload_dir=None, ): """ Fit a time series forecasting pipeline w/ automl :param train_df: the input dataframe (as pandas.dataframe) :param validation_df: the validation dataframe (as pandas.dataframe) :param recipe: the configuration of searching :param metric: the evaluation metric to optimize :param uncertainty: whether to enable uncertainty calculation (will output an uncertainty sigma) :param upload_dir: Optional URI to sync training results and checkpoints. We only support hdfs URI for now. :return a TSPipeline """ zoo_pipeline = self.internal.fit(train_df, validation_df, metric, recipe, mc=uncertainty, upload_dir=upload_dir) ppl = TSPipeline() ppl.internal = zoo_pipeline return ppl