def setup_method(self, method): # super().setup_method(method) self.train_data = pd.DataFrame(data=np.random.randn(64, 4)) self.val_data = pd.DataFrame(data=np.random.randn(16, 4)) self.test_data = pd.DataFrame(data=np.random.randn(16, 4)) self.past_seq_len = 6 self.future_seq_len_1 = 1 self.future_seq_len_2 = 2 # use roll method in time_sequence self.feat = TimeSequenceFeatureTransformer() self.config = { 'batch_size': 32, 'epochs': 1 } self.model_1 = LSTMSeq2Seq(check_optional_config=False, future_seq_len=self.future_seq_len_1) self.model_2 = LSTMSeq2Seq(check_optional_config=False, future_seq_len=self.future_seq_len_2) self.fitted = False self.predict_1 = None self.predict_2 = None
class TestZouwuModelMTNetForecaster(TestCase): def setUp(self): tf.keras.backend.clear_session() self.ft = TimeSequenceFeatureTransformer() self.create_data() def tearDown(self): pass def create_data(self): def gen_train_sample(data, past_seq_len, future_seq_len): data = pd.DataFrame(data) x, y = self.ft._roll_train(data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) return x, y def gen_test_sample(data, past_seq_len): test_data = pd.DataFrame(data) x = self.ft._roll_test(test_data, past_seq_len=past_seq_len) return x self.long_num = 6 self.time_step = 2 look_back = (self.long_num + 1) * self.time_step look_forward = 1 self.x_train, self.y_train = gen_train_sample( data=np.random.randn(64, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_test = gen_test_sample(data=np.random.randn(16, 4), past_seq_len=look_back) def test_forecast_mtnet(self): # TODO hacking to fix a bug target_dim = 1 model = MTNetForecaster(target_dim=target_dim, feature_dim=self.x_train.shape[-1], long_series_num=self.long_num, series_length=self.time_step) x_train_long, x_train_short = model.preprocess_input(self.x_train) x_val_long, x_val_short = model.preprocess_input(self.x_val) x_test_long, x_test_short = model.preprocess_input(self.x_test) model.fit([x_train_long, x_train_short], self.y_train, validation_data=([x_val_long, x_val_short], self.y_val), batch_size=32, distributed=False) assert model.evaluate([x_val_long, x_val_short], self.y_val) predict_result = model.predict([x_test_long, x_test_short]) assert predict_result.shape == (self.x_test.shape[0], target_dim)
def setup_method(self, method): tf.keras.backend.clear_session() self.ft = TimeSequenceFeatureTransformer() self.create_data() self.model = MTNetKeras() self.config = { "long_num": self.long_num, "time_step": self.time_step, "ar_window": np.random.randint(1, 3), "cnn_height": np.random.randint(1, 3), "epochs": 1 }
def fit_with_fixed_configs(self, input_df, validation_df=None, mc=False, **user_configs): """ Fit pipeline with fixed configs. The model will be trained from initialization with the hyper-parameter specified in configs. The configs contain both identity configs (Eg. "future_seq_len", "dt_col", "target_col", "metric") and automl tunable configs (Eg. "past_seq_len", "batch_size"). We recommend calling get_default_configs to see the name and default values of configs you you can specify. :param input_df: one data frame or a list of data frames :param validation_df: one data frame or a list of data frames :param user_configs: you can overwrite or add more configs with user_configs. Eg. "epochs" :return: """ # self._check_configs() if self.config is None: self.config = self.get_default_configs() if user_configs is not None: self.config.update(user_configs) ft_id_config_set = { 'future_seq_len', 'dt_col', 'target_col', 'extra_features_col', 'drop_missing' } ft_id_configs = {a: self.config[a] for a in ft_id_config_set} self.feature_transformers = TimeSequenceFeatureTransformer( **ft_id_configs) model_id_config_set = {'future_seq_len'} ft_id_configs = {a: self.config[a] for a in model_id_config_set} self.model = TimeSequenceModel(check_optional_config=False, **ft_id_configs) all_available_features = self.feature_transformers.get_feature_list( input_df) self.config.update({"selected_features": all_available_features}) (x_train, y_train) = self.feature_transformers.fit_transform( input_df, **self.config) if self._is_val_df_valid(validation_df): validation_data = self.feature_transformers.transform( validation_df) else: validation_data = None self.model.fit_eval(x_train, y_train, validation_data=validation_data, mc=mc, verbose=1, **self.config)
def test_dataframe_input_with_datetime(self): train_df, validation_df, future_seq_len = get_ts_input() ft = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", target_col="value") input_dim = len(ft.get_feature_list()) + 1 searcher = prepare_searcher(data=train_df, validation_data=validation_df, model_creator=LSTM_model_creator, name='test_ray_dateframe_with_datetime_with_val', recipe=create_lstm_recipe(input_dim), feature_transformer=ft) searcher.run() best_trials = searcher.get_best_trials(k=1) assert best_trials is not None
def test_evaluate_predict_future_more_1(self): target_col = "values" metrics = ["mse", "r2"] future_seq_len = np.random.randint(2, 6) train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) pipeline = tsp.fit(train_df, test_df) mse, rs = pipeline.evaluate(test_df, metrics=metrics) assert len(mse) == future_seq_len assert len(rs) == future_seq_len y_pred = pipeline.predict(test_df) assert y_pred.shape == (test_sample_num - default_past_seq_len + 1, future_seq_len + 1) y_pred_df = pipeline.predict(test_df[:-future_seq_len]) columns = ["{}_{}".format(target_col, i) for i in range(future_seq_len)] y_pred_value = y_pred_df[columns].values y_df = test_df[default_past_seq_len:] y_value = TimeSequenceFeatureTransformer()._roll_test(y_df[target_col], future_seq_len) mse_pred_eval, rs_pred_eval = [Evaluator.evaluate(m, y_value, y_pred_value) for m in metrics] mse_eval, rs_eval = pipeline.evaluate(test_df, metrics) assert_array_almost_equal(mse_pred_eval, mse_eval, decimal=2) assert_array_almost_equal(rs_pred_eval, rs_eval, decimal=2)
def load_ts_pipeline(file): feature_transformers = TimeSequenceFeatureTransformer() model = TimeSequenceModel(check_optional_config=False) all_config = restore_zip(file, feature_transformers, model) ts_pipeline = TimeSequencePipeline(feature_transformers=feature_transformers, model=model, config=all_config) print("Restore pipeline from", file) return ts_pipeline
def test_dataframe_input_with_datetime(self): train_df, validation_df, future_seq_len = get_ts_input() dataframe_with_datetime = {'df': train_df, 'val_df': validation_df} ft = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", target_col="value") searcher = prepare_searcher(data=dataframe_with_datetime, model_creator=LSTM_model_creator, name='test_ray_dateframe_with_datetime_with_val', feature_transformer=ft) searcher.run() best_trials = searcher.get_best_trials(k=1) assert best_trials is not None
class TestSeq2Seq(ZooTestCase): def setup_method(self, method): # super().setup_method(method) self.train_data = pd.DataFrame(data=np.random.randn(64, 4)) self.val_data = pd.DataFrame(data=np.random.randn(16, 4)) self.test_data = pd.DataFrame(data=np.random.randn(16, 4)) self.past_seq_len = 6 self.future_seq_len_1 = 1 self.future_seq_len_2 = 2 # use roll method in time_sequence self.feat = TimeSequenceFeatureTransformer() self.config = {'batch_size': 32, 'epochs': 1} self.model_1 = LSTMSeq2Seq(check_optional_config=False, future_seq_len=self.future_seq_len_1) self.model_2 = LSTMSeq2Seq(check_optional_config=False, future_seq_len=self.future_seq_len_2) self.fitted = False self.predict_1 = None self.predict_2 = None def teardown_method(self, method): pass def test_fit_eval_1(self): x_train_1, y_train_1 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_1) print("fit_eval_future_seq_len_1:", self.model_1.fit_eval(x_train_1, y_train_1, **self.config)) assert self.model_1.past_seq_len == 6 assert self.model_1.feature_num == 4 assert self.model_1.future_seq_len == 1 assert self.model_1.target_col_num == 1 def test_fit_eval(self): past_seq_len = 6 future_seq_len = 2 input_dim = 5 output_dim = 4 x_train = np.random.rand(100, past_seq_len, input_dim) y_train = np.random.rand(100, future_seq_len, output_dim) x_test = np.random.rand(100, past_seq_len, input_dim) y_test = np.random.rand(100, future_seq_len, output_dim) model = LSTMSeq2Seq(check_optional_config=False, future_seq_len=future_seq_len) model_config = { 'batch_size': 32, 'epochs': 1, 'latent_dim': 128, 'dropout': 0.2 } model.fit_eval(x_train, y_train, **model_config) y_pred = model.predict(x_test) rmse, smape = model.evaluate(x=x_test, y=y_test, metric=["rmse", "smape"]) assert rmse.shape == smape.shape assert rmse.shape == (future_seq_len, output_dim) assert model.past_seq_len == past_seq_len assert model.future_seq_len == future_seq_len assert model.feature_num == input_dim assert model.target_col_num == output_dim assert y_pred.shape == y_test.shape def test_fit_eval_2(self): x_train_2, y_train_2 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_2) print("fit_eval_future_seq_len_2:", self.model_2.fit_eval(x_train_2, y_train_2, **self.config)) assert self.model_2.future_seq_len == 2 self.fitted = True def test_evaluate_1(self): x_train_1, y_train_1 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_1) x_val_1, y_val_1 = self.feat._roll_train( self.val_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_1) self.model_1.fit_eval(x_train_1, y_train_1, **self.config) print("evaluate_future_seq_len_1:", self.model_1.evaluate(x_val_1, y_val_1, metric=['mse', 'r2'])) def test_evaluate_2(self): x_train_2, y_train_2 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_2) x_val_2, y_val_2 = self.feat._roll_train( self.val_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_2) self.model_2.fit_eval(x_train_2, y_train_2, **self.config) print("evaluate_future_seq_len_2:", self.model_2.evaluate(x_val_2, y_val_2, metric=['mse', 'r2'])) def test_predict_1(self): x_train_1, y_train_1 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_1) x_test_1 = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) self.model_1.fit_eval(x_train_1, y_train_1, **self.config) predict_1 = self.model_1.predict(x_test_1) assert predict_1.shape == (x_test_1.shape[0], self.future_seq_len_1) def test_predict_2(self): x_train_2, y_train_2 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_2) x_test_2 = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) self.model_2.fit_eval(x_train_2, y_train_2, **self.config) predict_2 = self.model_2.predict(x_test_2) assert predict_2.shape == (x_test_2.shape[0], self.future_seq_len_2) def test_save_restore_1(self): x_train_1, y_train_1 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_1) x_test_1 = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) self.model_1.fit_eval(x_train_1, y_train_1, **self.config) predict_1_before = self.model_1.predict(x_test_1) new_model_1 = LSTMSeq2Seq(check_optional_config=False) dirname = tempfile.mkdtemp(prefix="automl_test_feature") try: save(dirname, model=self.model_1) restore(dirname, model=new_model_1, config=self.config) predict_1_after = new_model_1.predict(x_test_1) assert_array_almost_equal(predict_1_before, predict_1_after, decimal=2), \ "Prediction values are not the same after restore: " \ "predict before is {}, and predict after is {}".format(predict_1_before, predict_1_after) new_config = {'epochs': 1} new_model_1.fit_eval(x_train_1, y_train_1, **new_config) finally: shutil.rmtree(dirname) def test_save_restore_2(self): x_train_2, y_train_2 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_2) x_test_2 = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) self.model_2.fit_eval(x_train_2, y_train_2, **self.config) predict_2_before = self.model_2.predict(x_test_2) new_model_2 = LSTMSeq2Seq(check_optional_config=False) dirname = tempfile.mkdtemp(prefix="automl_test_feature") try: save(dirname, model=self.model_2) restore(dirname, model=new_model_2, config=self.config) predict_2_after = new_model_2.predict(x_test_2) assert_array_almost_equal(predict_2_before, predict_2_after, decimal=2), \ "Prediction values are not the same after restore: " \ "predict before is {}, and predict after is {}".format(predict_2_before, predict_2_after) new_config = {'epochs': 2} new_model_2.fit_eval(x_train_2, y_train_2, **new_config) finally: shutil.rmtree(dirname) def test_predict_with_uncertainty(self, ): x_train_2, y_train_2 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_2) x_test_2 = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) self.model_2.fit_eval(x_train_2, y_train_2, mc=True, **self.config) prediction, uncertainty = self.model_2.predict_with_uncertainty( x_test_2, n_iter=2) assert prediction.shape == (x_test_2.shape[0], self.future_seq_len_2) assert uncertainty.shape == (x_test_2.shape[0], self.future_seq_len_2) assert np.any(uncertainty) new_model_2 = LSTMSeq2Seq(check_optional_config=False) dirname = tempfile.mkdtemp(prefix="automl_test_feature") try: save(dirname, model=self.model_2) restore(dirname, model=new_model_2, config=self.config) prediction, uncertainty = new_model_2.predict_with_uncertainty( x_test_2, n_iter=2) assert prediction.shape == (x_test_2.shape[0], self.future_seq_len_2) assert uncertainty.shape == (x_test_2.shape[0], self.future_seq_len_2) assert np.any(uncertainty) finally: shutil.rmtree(dirname)
def setUp(self): tf.keras.backend.clear_session() self.ft = TimeSequenceFeatureTransformer() self.create_data()
class TimeSequencePipeline(Pipeline): def __init__(self, feature_transformers=None, model=None, config=None, name=None): """ initialize a pipeline :param model: the internal model :param feature_transformers: the feature transformers """ self.feature_transformers = feature_transformers self.model = model self.config = config self.name = name self.time = time.strftime("%Y%m%d-%H%M%S") def describe(self): init_info = [ 'future_seq_len', 'dt_col', 'target_col', 'extra_features_col', 'drop_missing' ] print("**** Initialization info ****") for info in init_info: print(info + ":", self.config[info]) print("") def fit(self, input_df, validation_df=None, mc=False, epoch_num=20): x, y = self.feature_transformers.transform(input_df, is_train=True) if validation_df is not None and not validation_df.empty: validation_data = self.feature_transformers.transform( validation_df) else: validation_data = None new_config = {'epochs': epoch_num} self.model.fit_eval(x, y, validation_data, mc=mc, verbose=1, **new_config) print('Fit done!') def _is_val_df_valid(self, validation_df): df_not_empty = isinstance(validation_df, pd.DataFrame) and not validation_df.empty df_list_not_empty = isinstance(validation_df, list) \ and validation_df and not all([d.empty for d in validation_df]) if validation_df is not None and (df_not_empty or df_list_not_empty): return True else: return False def _check_configs(self): required_configs = {'future_seq_len'} if not self.config.keys() & required_configs: raise ValueError("Missing required parameters in configuration. " + "Required parameters are: " + str(required_configs)) default_config = { 'dt_col': 'datetime', 'target_col': 'value', 'extra_features_col': None, 'drop_missing': True, 'past_seq_len': 2, 'batch_size': 64, 'lr': 0.001, 'dropout': 0.2, 'epochs': 10, 'metric': 'mse' } for config, value in default_config.items(): if config not in self.config: print('Config: \'{}\' is not specified. ' 'A default value of {} will be used.'.format( config, value)) def get_default_configs(self): default_configs = { 'dt_col': 'datetime', 'target_col': 'value', 'extra_features_col': None, 'drop_missing': True, 'future_seq_len': 1, 'past_seq_len': 2, 'batch_size': 64, 'lr': 0.001, 'dropout': 0.2, 'epochs': 10, 'metric': 'mean_squared_error' } print("**** default config: ****") for config in default_configs: print(config + ":", default_configs[config]) print( "You can change any fields in the default configs by passing into " "fit_with_fixed_configs(). Otherwise, the default values will be used." ) return default_configs def fit_with_fixed_configs(self, input_df, validation_df=None, mc=False, **user_configs): """ Fit pipeline with fixed configs. The model will be trained from initialization with the hyper-parameter specified in configs. The configs contain both identity configs (Eg. "future_seq_len", "dt_col", "target_col", "metric") and automl tunable configs (Eg. "past_seq_len", "batch_size"). We recommend calling get_default_configs to see the name and default values of configs you you can specify. :param input_df: one data frame or a list of data frames :param validation_df: one data frame or a list of data frames :param user_configs: you can overwrite or add more configs with user_configs. Eg. "epochs" :return: """ # self._check_configs() if self.config is None: self.config = self.get_default_configs() if user_configs is not None: self.config.update(user_configs) ft_id_config_set = { 'future_seq_len', 'dt_col', 'target_col', 'extra_features_col', 'drop_missing' } ft_id_configs = {a: self.config[a] for a in ft_id_config_set} self.feature_transformers = TimeSequenceFeatureTransformer( **ft_id_configs) model_id_config_set = {'future_seq_len'} ft_id_configs = {a: self.config[a] for a in model_id_config_set} self.model = TimeSequenceModel(check_optional_config=False, **ft_id_configs) all_available_features = self.feature_transformers.get_feature_list( input_df) self.config.update({"selected_features": all_available_features}) (x_train, y_train) = self.feature_transformers.fit_transform( input_df, **self.config) if self._is_val_df_valid(validation_df): validation_data = self.feature_transformers.transform( validation_df) else: validation_data = None self.model.fit_eval(x_train, y_train, validation_data=validation_data, mc=mc, verbose=1, **self.config) def evaluate(self, input_df, metrics=["mse"], multioutput='raw_values'): """ evaluate the pipeline :param input_df: :param metrics: subset of ['mean_squared_error', 'r_square', 'sMAPE'] :param multioutput: string in ['raw_values', 'uniform_average'] 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. :return: """ if isinstance(metrics, str): metrics = [metrics] # if not isinstance(metrics, list): # raise ValueError("Expected metrics to be a list!") x, y = self.feature_transformers.transform(input_df, is_train=True) y_pred = self.model.predict(x) if len(y_pred.shape) > 1 and y_pred.shape[1] == 1: multioutput = 'uniform_average' y_unscale, y_pred_unscale = self.feature_transformers.post_processing( input_df, y_pred, is_train=True) return [ Evaluator.evaluate(m, y_unscale, y_pred_unscale, multioutput=multioutput) for m in metrics ] def predict(self, input_df): """ predict test data with the pipeline fitted :param input_df: :return: """ x, _ = self.feature_transformers.transform(input_df, is_train=False) y_pred = self.model.predict(x) y_output = self.feature_transformers.post_processing(input_df, y_pred, is_train=False) return y_output def predict_with_uncertainty(self, input_df, n_iter=100): x, _ = self.feature_transformers.transform(input_df, is_train=False) y_pred, y_pred_uncertainty = self.model.predict_with_uncertainty( x=x, n_iter=n_iter) y_output = self.feature_transformers.post_processing(input_df, y_pred, is_train=False) y_uncertainty = self.feature_transformers.unscale_uncertainty( y_pred_uncertainty) return y_output, y_uncertainty def save(self, ppl_file=None): """ save pipeline to file, contains feature transformer, model, trial config. :param ppl_file: :return: """ ppl_file = ppl_file or os.path.join( DEFAULT_PPL_DIR, "{}_{}.ppl".format(self.name, self.time)) save_zip(ppl_file, self.feature_transformers, self.model, self.config) print("Pipeline is saved in", ppl_file) return ppl_file def config_save(self, config_file=None): """ save all configs to file. :param config_file: :return: """ config_file = config_file or os.path.join( DEFAULT_CONFIG_DIR, "{}_{}.json".format(self.name, self.time)) save_config(config_file, self.config, replace=True) return config_file
def setup_method(self, method): self.ft = TimeSequenceFeatureTransformer() self.create_data()
def create_feature_transformer(self): ft = TimeSequenceFeatureTransformer(self.future_seq_len, self.dt_col, self.target_col, self.extra_features_col, self.drop_missing) return ft
class TestMTNetKeras(ZooTestCase): def setup_method(self, method): tf.keras.backend.clear_session() self.ft = TimeSequenceFeatureTransformer() self.create_data() self.model = MTNetKeras() self.config = { "long_num": self.long_num, "time_step": self.time_step, "ar_window": np.random.randint(1, 3), "cnn_height": np.random.randint(1, 3), "epochs": 1 } def teardown_method(self, method): pass def create_data(self): def gen_train_sample(data, past_seq_len, future_seq_len): data = pd.DataFrame(data) x, y = self.ft._roll_train(data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) return x, y def gen_test_sample(data, past_seq_len): test_data = pd.DataFrame(data) x = self.ft._roll_test(test_data, past_seq_len=past_seq_len) return x self.long_num = 6 self.time_step = 2 look_back = (self.long_num + 1) * self.time_step look_forward = 1 self.x_train, self.y_train = gen_train_sample( data=np.random.randn(64, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_test = gen_test_sample(data=np.random.randn(16, 4), past_seq_len=look_back) def test_fit_evaluate(self): self.model.fit_eval(data=(self.x_train, self.y_train), validation_data=(self.x_val, self.y_val), **self.config) self.model.evaluate(self.x_val, self.y_val) def test_save_restore(self): self.model.fit_eval(data=(self.x_train, self.y_train), validation_data=(self.x_val, self.y_val), **self.config) y_pred = self.model.predict(self.x_test) assert y_pred.shape == (self.x_test.shape[0], self.y_train.shape[1]) dirname = "tmp" restored_model = MTNetKeras() try: save(dirname, model=self.model) restore(dirname, model=restored_model, config=self.config) predict_after = restored_model.predict(self.x_test) assert_array_almost_equal(y_pred, predict_after, decimal=2), \ "Prediction values are not the same after restore: " \ "predict before is {}, and predict after is {}".format(y_pred, predict_after) restored_model.fit_eval((self.x_train, self.y_train), epochs=1) restored_model.evaluate(self.x_val, self.y_val) finally: shutil.rmtree("tmp") def test_predict_with_uncertainty(self): self.model.fit_eval(data=(self.x_train, self.y_train), validation_data=(self.x_val, self.y_val), mc=True, **self.config) pred, uncertainty = self.model.predict_with_uncertainty(self.x_test, n_iter=2) assert pred.shape == (self.x_test.shape[0], self.y_train.shape[1]) assert uncertainty.shape == pred.shape assert np.any(uncertainty)