def setup_method(self, method): # super().setup_method(method) train_data = pd.DataFrame(data=np.random.randn(64, 4)) val_data = pd.DataFrame(data=np.random.randn(16, 4)) test_data = pd.DataFrame(data=np.random.randn(16, 4)) future_seq_len = 1 past_seq_len = 6 # use roll method in time_sequence tsft = TimeSequenceFeatureTransformer() self.x_train, self.y_train = tsft._roll_train( train_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) self.x_val, self.y_val = tsft._roll_train( val_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) self.x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len) self.config = { 'epochs': 1, "lr": 0.001, "lstm_1_units": 16, "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": 0.2, "batch_size": 32, } self.model = VanillaLSTM(check_optional_config=False, future_seq_len=future_seq_len)
def test_evaluate(self): train_data = pd.DataFrame(data=np.random.randn(64, 4)) val_data = pd.DataFrame(data=np.random.randn(16, 4)) future_seq_len = 1 past_seq_len = 6 # use roll method in time_sequence tsft = TimeSequenceFeatureTransformer() x_train, y_train = tsft._roll_train(train_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) x_val, y_val = tsft._roll_train(val_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) config = { 'epochs': 1, "lr": 0.001, "lstm_1_units": 16, "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": 0.2, "batch_size": 32, } model = VanillaLSTM(check_optional_config=False, future_seq_len=future_seq_len) model.fit_eval(x_train, y_train, **config) print("evaluate:", model.evaluate(x_val, y_val))
def test_predict(self): train_data = pd.DataFrame(data=np.random.randn(64, 4)) test_data = pd.DataFrame(data=np.random.randn(16, 4)) future_seq_len = 1 past_seq_len = 6 # use roll method in time_sequence tsft = TimeSequenceFeatureTransformer() x_train, y_train = tsft._roll_train(train_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len) config = { 'epochs': 2, "lr": 0.001, "lstm_1_units": 16, "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": 0.2, "batch_size": 32, } model = VanillaLSTM(check_optional_config=False, future_seq_len=future_seq_len) model.fit_eval(x_train, y_train, **config) y_pred = model.predict(x_test) assert y_pred.shape == (x_test.shape[0], 1)
class TestZouwuModelMTNetForecaster(TestCase): def setUp(self): tf.keras.backend.clear_session() self.ft = TimeSequenceFeatureTransformer() self.create_data() def tearDown(self): pass def create_data(self): def gen_train_sample(data, past_seq_len, future_seq_len): data = pd.DataFrame(data) x, y = self.ft._roll_train(data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) return x, y def gen_test_sample(data, past_seq_len): test_data = pd.DataFrame(data) x = self.ft._roll_test(test_data, past_seq_len=past_seq_len) return x self.long_num = 6 self.time_step = 2 look_back = (self.long_num + 1) * self.time_step look_forward = 1 self.x_train, self.y_train = gen_train_sample( data=np.random.randn(64, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_test = gen_test_sample(data=np.random.randn(16, 4), past_seq_len=look_back) def test_forecast_mtnet(self): # TODO hacking to fix a bug target_dim = 1 model = MTNetForecaster(target_dim=target_dim, feature_dim=self.x_train.shape[-1], long_series_num=self.long_num, series_length=self.time_step) x_train_long, x_train_short = model.preprocess_input(self.x_train) x_val_long, x_val_short = model.preprocess_input(self.x_val) x_test_long, x_test_short = model.preprocess_input(self.x_test) model.fit([x_train_long, x_train_short], self.y_train, validation_data=([x_val_long, x_val_short], self.y_val), batch_size=32, distributed=False) assert model.evaluate([x_val_long, x_val_short], self.y_val) predict_result = model.predict([x_test_long, x_test_short]) assert predict_result.shape == (self.x_test.shape[0], target_dim)
class TestZouwuModelLSTMForecaster(TestCase): def setUp(self): tf.keras.backend.clear_session() self.ft = TimeSequenceFeatureTransformer() self.create_data() def tearDown(self): pass def create_data(self): def gen_train_sample(data, past_seq_len, future_seq_len): data = pd.DataFrame(data) x, y = self.ft._roll_train(data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) return x, y def gen_test_sample(data, past_seq_len): test_data = pd.DataFrame(data) x = self.ft._roll_test(test_data, past_seq_len=past_seq_len) return x self.long_num = 6 self.time_step = 2 look_back = (self.long_num + 1) * self.time_step look_forward = 1 self.x_train, self.y_train = gen_train_sample( data=np.random.randn(64, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_test = gen_test_sample(data=np.random.randn(16, 4), past_seq_len=look_back) def test_forecast_lstm(self): # TODO hacking to fix a bug model = LSTMForecaster(target_dim=1, feature_dim=self.x_train.shape[-1]) model.fit(self.x_train, self.y_train, validation_data=(self.x_val, self.y_val), batch_size=8, distributed=False) model.evaluate(self.x_val, self.y_val) model.predict(self.x_test)
def test_save_restore(self): train_data = pd.DataFrame(data=np.random.randn(64, 4)) test_data = pd.DataFrame(data=np.random.randn(16, 4)) future_seq_len = 1 past_seq_len = 6 # use roll method in time_sequence tsft = TimeSequenceFeatureTransformer() x_train, y_train = tsft._roll_train(train_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len) config = { 'epochs': 2, "lr": 0.001, "lstm_1_units": 16, "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": 0.2, "batch_size": 32, } dirname = tempfile.mkdtemp(prefix="automl_test_vanilla") try: model = VanillaLSTM(check_optional_config=False, future_seq_len=future_seq_len) model.fit_eval(x_train, y_train, **config) predict_before = model.predict(x_test) model_path = os.path.join(dirname, "testmodel.h5") config_path = os.path.join(dirname, "local_config.json") model.save(model_path=model_path, config_path=config_path) local_config = load_config(config_path) config.update(local_config) model.restore(model_path=model_path, **config) predict_after = model.predict(x_test) assert np.allclose(predict_before, predict_after) finally: shutil.rmtree(dirname)
class TestSeq2Seq(ZooTestCase): def setup_method(self, method): # super().setup_method(method) self.train_data = pd.DataFrame(data=np.random.randn(64, 4)) self.val_data = pd.DataFrame(data=np.random.randn(16, 4)) self.test_data = pd.DataFrame(data=np.random.randn(16, 4)) self.past_seq_len = 6 self.future_seq_len_1 = 1 self.future_seq_len_2 = 2 # use roll method in time_sequence self.feat = TimeSequenceFeatureTransformer() self.config = {'batch_size': 32, 'epochs': 1} self.model_1 = LSTMSeq2Seq(check_optional_config=False, future_seq_len=self.future_seq_len_1) self.model_2 = LSTMSeq2Seq(check_optional_config=False, future_seq_len=self.future_seq_len_2) self.fitted = False self.predict_1 = None self.predict_2 = None def teardown_method(self, method): pass def test_fit_eval_1(self): x_train_1, y_train_1 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_1) print("fit_eval_future_seq_len_1:", self.model_1.fit_eval(x_train_1, y_train_1, **self.config)) assert self.model_1.past_seq_len == 6 assert self.model_1.feature_num == 4 assert self.model_1.future_seq_len == 1 assert self.model_1.target_col_num == 1 def test_fit_eval_2(self): x_train_2, y_train_2 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_2) print("fit_eval_future_seq_len_2:", self.model_2.fit_eval(x_train_2, y_train_2, **self.config)) assert self.model_2.future_seq_len == 2 self.fitted = True def test_evaluate_1(self): x_train_1, y_train_1 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_1) x_val_1, y_val_1 = self.feat._roll_train( self.val_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_1) self.model_1.fit_eval(x_train_1, y_train_1, **self.config) print("evaluate_future_seq_len_1:", self.model_1.evaluate(x_val_1, y_val_1, metric=['mse', 'r2'])) def test_evaluate_2(self): x_train_2, y_train_2 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_2) x_val_2, y_val_2 = self.feat._roll_train( self.val_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_2) self.model_2.fit_eval(x_train_2, y_train_2, **self.config) print("evaluate_future_seq_len_2:", self.model_2.evaluate(x_val_2, y_val_2, metric=['mse', 'r2'])) def test_predict_1(self): x_train_1, y_train_1 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_1) x_test_1 = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) self.model_1.fit_eval(x_train_1, y_train_1, **self.config) predict_1 = self.model_1.predict(x_test_1) assert predict_1.shape == (x_test_1.shape[0], self.future_seq_len_1) def test_predict_2(self): x_train_2, y_train_2 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_2) x_test_2 = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) self.model_2.fit_eval(x_train_2, y_train_2, **self.config) predict_2 = self.model_2.predict(x_test_2) assert predict_2.shape == (x_test_2.shape[0], self.future_seq_len_2) def test_save_restore_1(self): x_train_1, y_train_1 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_1) x_test_1 = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) self.model_1.fit_eval(x_train_1, y_train_1, **self.config) predict_1_before = self.model_1.predict(x_test_1) new_model_1 = LSTMSeq2Seq(check_optional_config=False) dirname = tempfile.mkdtemp(prefix="automl_test_feature") try: save(dirname, model=self.model_1) restore(dirname, model=new_model_1, config=self.config) predict_1_after = new_model_1.predict(x_test_1) assert_array_almost_equal(predict_1_before, predict_1_after, decimal=2), \ "Prediction values are not the same after restore: " \ "predict before is {}, and predict after is {}".format(predict_1_before, predict_1_after) new_config = {'epochs': 1} new_model_1.fit_eval(x_train_1, y_train_1, **new_config) finally: shutil.rmtree(dirname) def test_save_restore_2(self): x_train_2, y_train_2 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_2) x_test_2 = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) self.model_2.fit_eval(x_train_2, y_train_2, **self.config) predict_2_before = self.model_2.predict(x_test_2) new_model_2 = LSTMSeq2Seq(check_optional_config=False) dirname = tempfile.mkdtemp(prefix="automl_test_feature") try: save(dirname, model=self.model_2) restore(dirname, model=new_model_2, config=self.config) predict_2_after = new_model_2.predict(x_test_2) assert_array_almost_equal(predict_2_before, predict_2_after, decimal=2), \ "Prediction values are not the same after restore: " \ "predict before is {}, and predict after is {}".format(predict_2_before, predict_2_after) new_config = {'epochs': 2} new_model_2.fit_eval(x_train_2, y_train_2, **new_config) finally: shutil.rmtree(dirname) def test_predict_with_uncertainty(self, ): x_train_2, y_train_2 = self.feat._roll_train( self.train_data, past_seq_len=self.past_seq_len, future_seq_len=self.future_seq_len_2) x_test_2 = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) self.model_2.fit_eval(x_train_2, y_train_2, mc=True, **self.config) prediction, uncertainty = self.model_2.predict_with_uncertainty( x_test_2, n_iter=2) assert prediction.shape == (x_test_2.shape[0], self.future_seq_len_2) assert uncertainty.shape == (x_test_2.shape[0], self.future_seq_len_2) assert np.any(uncertainty) new_model_2 = LSTMSeq2Seq(check_optional_config=False) dirname = tempfile.mkdtemp(prefix="automl_test_feature") try: save(dirname, model=self.model_2) restore(dirname, model=new_model_2, config=self.config) prediction, uncertainty = new_model_2.predict_with_uncertainty( x_test_2, n_iter=2) assert prediction.shape == (x_test_2.shape[0], self.future_seq_len_2) assert uncertainty.shape == (x_test_2.shape[0], self.future_seq_len_2) assert np.any(uncertainty) finally: shutil.rmtree(dirname)
class TestZouwuModelForecast(ZooTestCase): def setup_method(self, method): tf.keras.backend.clear_session() # super(TestZouwuModelForecast, self).setup_method(method) self.ft = TimeSequenceFeatureTransformer() self.create_data() def teardown_method(self, method): pass def create_data(self): def gen_train_sample(data, past_seq_len, future_seq_len): data = pd.DataFrame(data) x, y = self.ft._roll_train(data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) return x, y def gen_test_sample(data, past_seq_len): test_data = pd.DataFrame(data) x = self.ft._roll_test(test_data, past_seq_len=past_seq_len) return x self.long_num = 6 self.time_step = 2 look_back = (self.long_num + 1) * self.time_step look_forward = 1 self.x_train, self.y_train = gen_train_sample( data=np.random.randn(64, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_test = gen_test_sample(data=np.random.randn(16, 4), past_seq_len=look_back) def test_forecast_lstm(self): # TODO hacking to fix a bug model = LSTMForecaster(target_dim=1, feature_dim=self.x_train.shape[-1]) model.fit(self.x_train, self.y_train, validation_data=(self.x_val, self.y_val), batch_size=8, distributed=False) model.evaluate(self.x_val, self.y_val) model.predict(self.x_test) def test_forecast_mtnet(self): # TODO hacking to fix a bug model = MTNetForecaster(target_dim=1, feature_dim=self.x_train.shape[-1], long_series_num=self.long_num, series_length=self.time_step) x_train_long, x_train_short = model.preprocess_input(self.x_train) x_val_long, x_val_short = model.preprocess_input(self.x_val) x_test_long, x_test_short = model.preprocess_input(self.x_test) model.fit([x_train_long, x_train_short], self.y_train, validation_data=([x_val_long, x_val_short], self.y_val), batch_size=32, distributed=False) model.evaluate([x_val_long, x_val_short], self.y_val) model.predict([x_test_long, x_test_short]) def test_forecast_tcmf(self): from zoo.zouwu.model.forecast import TCMFForecaster import tempfile model = TCMFForecaster(max_y_iterations=1, init_FX_epoch=1, max_FX_epoch=1, max_TCN_epoch=1, alt_iters=2) horizon = np.random.randint(1, 50) # construct data id = np.arange(300) data = np.random.rand(300, 480) input = dict({'data': data}) with self.assertRaises(Exception) as context: model.fit(input) self.assertTrue("key `y` doesn't exist in x" in str(context.exception)) input = dict({'id': id, 'y': data}) with self.assertRaises(Exception) as context: model.is_distributed() self.assertTrue('You should run fit before calling is_distributed()' in str(context.exception)) model.fit(input) assert not model.is_distributed() with self.assertRaises(Exception) as context: model.fit(input) self.assertTrue('This model has already been fully trained' in str( context.exception)) with self.assertRaises(Exception) as context: model.fit(input, incremental=True) self.assertTrue( 'NotImplementedError' in context.exception.__class__.__name__) with tempfile.TemporaryDirectory() as tempdirname: model.save(tempdirname) loaded_model = TCMFForecaster.load(tempdirname, distributed=False) yhat = model.predict(x=None, horizon=horizon) yhat_loaded = loaded_model.predict(x=None, horizon=horizon) yhat_id = yhat_loaded["id"] assert (yhat_id == id).all() yhat = yhat["prediction"] yhat_loaded = yhat_loaded["prediction"] assert yhat.shape == (300, horizon) assert (yhat == yhat_loaded).all() target_value = np.random.rand(300, horizon) target_value = dict({"y": target_value}) model.evaluate(x=None, target_value=target_value, metric=['mse']) def test_forecast_tcmf_without_id(self): from zoo.zouwu.model.forecast import TCMFForecaster import tempfile model = TCMFForecaster(max_y_iterations=1, init_FX_epoch=1, max_FX_epoch=1, max_TCN_epoch=1, alt_iters=2) horizon = np.random.randint(1, 50) # construct data id = np.arange(200) data = np.random.rand(300, 480) input = dict({'y': "abc"}) with self.assertRaises(Exception) as context: model.fit(input) self.assertTrue( "the value of y should be an ndarray" in str(context.exception)) input = dict({'id': id, 'y': data}) with self.assertRaises(Exception) as context: model.fit(input) self.assertTrue( "the length of the id array should be equal to the number of" in str(context.exception)) input = dict({'y': data}) model.fit(input) assert not model.is_distributed() with self.assertRaises(Exception) as context: model.fit(input) self.assertTrue('This model has already been fully trained' in str( context.exception)) with tempfile.TemporaryDirectory() as tempdirname: model.save(tempdirname) loaded_model = TCMFForecaster.load(tempdirname, distributed=False) yhat = model.predict(x=None, horizon=horizon) yhat_loaded = loaded_model.predict(x=None, horizon=horizon) assert "id" not in yhat_loaded yhat = yhat["prediction"] yhat_loaded = yhat_loaded["prediction"] assert yhat.shape == (300, horizon) assert (yhat == yhat_loaded).all() target_value = np.random.rand(300, horizon) target_value_fake = dict({"data": target_value}) with self.assertRaises(Exception) as context: model.evaluate(x=None, target_value=target_value_fake, metric=['mse']) self.assertTrue("key y doesn't exist in y" in str(context.exception)) target_value = dict({"y": target_value}) model.evaluate(x=None, target_value=target_value, metric=['mse']) def test_forecast_tcmf_xshards(self): from zoo.zouwu.model.forecast import TCMFForecaster from zoo.orca import OrcaContext import zoo.orca.data.pandas import tempfile OrcaContext.pandas_read_backend = "pandas" def preprocessing(df, id_name, y_name): id = df.index data = df.to_numpy() result = dict({id_name: id, y_name: data}) return result def postprocessing(pred_results, output_dt_col_name): id_arr = pred_results["id"] pred_results = pred_results["prediction"] pred_results = np.concatenate( (np.expand_dims(id_arr, axis=1), pred_results), axis=1) final_df = pd.DataFrame(pred_results, columns=["id"] + output_dt_col_name) final_df.id = final_df.id.astype("int") final_df = final_df.set_index("id") final_df.columns.name = "datetime" final_df = final_df.unstack().reset_index().rename( {0: "prediction"}, axis=1) return final_df def get_pred(d): return d["prediction"] model = TCMFForecaster(max_y_iterations=1, init_FX_epoch=1, max_FX_epoch=1, max_TCN_epoch=1, alt_iters=2) with tempfile.NamedTemporaryFile() as temp: data = np.random.rand(300, 480) df = pd.DataFrame(data) df.to_csv(temp.name) shard = zoo.orca.data.pandas.read_csv(temp.name) shard.cache() shard_train = shard.transform_shard(preprocessing, 'id', 'data') with self.assertRaises(Exception) as context: model.fit(shard_train) self.assertTrue("key `y` doesn't exist in x" in str(context.exception)) shard_train = shard.transform_shard(preprocessing, 'cid', 'y') with self.assertRaises(Exception) as context: model.fit(shard_train) self.assertTrue( "key `id` doesn't exist in x" in str(context.exception)) with self.assertRaises(Exception) as context: model.is_distributed() self.assertTrue('You should run fit before calling is_distributed()' in str(context.exception)) shard_train = shard.transform_shard(preprocessing, 'id', 'y') model.fit(shard_train) assert model.is_distributed() with self.assertRaises(Exception) as context: model.fit(shard_train) self.assertTrue('This model has already been fully trained' in str( context.exception)) with self.assertRaises(Exception) as context: model.fit(shard_train, incremental=True) self.assertTrue( 'NotImplementedError' in context.exception.__class__.__name__) with tempfile.TemporaryDirectory() as tempdirname: model.save(tempdirname + "/model") loaded_model = TCMFForecaster.load(tempdirname + "/model", distributed=True) horizon = np.random.randint(1, 50) yhat_shard_origin = model.predict(x=None, horizon=horizon) yhat_list_origin = yhat_shard_origin.collect() yhat_list_origin = list(map(get_pred, yhat_list_origin)) yhat_shard = loaded_model.predict(x=None, horizon=horizon) yhat_list = yhat_shard.collect() yhat_list = list(map(get_pred, yhat_list)) yhat_origin = np.concatenate(yhat_list_origin) yhat = np.concatenate(yhat_list) assert yhat.shape == (300, horizon) assert (yhat == yhat_origin).all() output_dt_col_name = pd.date_range(start='2020-05-01', periods=horizon, freq='H').to_list() yhat_df_shards = yhat_shard.transform_shard(postprocessing, output_dt_col_name) final_df_list = yhat_df_shards.collect() final_df = pd.concat(final_df_list) final_df.sort_values("datetime", inplace=True) assert final_df.shape == (300 * horizon, 3) OrcaContext.pandas_read_backend = "spark"
class TestZouwuModelForecast(ZooTestCase): def setup_method(self, method): tf.keras.backend.clear_session() # super(TestZouwuModelForecast, self).setup_method(method) self.ft = TimeSequenceFeatureTransformer() self.create_data() def teardown_method(self, method): pass def create_data(self): def gen_train_sample(data, past_seq_len, future_seq_len): data = pd.DataFrame(data) x, y = self.ft._roll_train(data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) return x, y def gen_test_sample(data, past_seq_len): test_data = pd.DataFrame(data) x = self.ft._roll_test(test_data, past_seq_len=past_seq_len) return x self.long_num = 6 self.time_step = 2 look_back = (self.long_num + 1) * self.time_step look_forward = 1 self.x_train, self.y_train = gen_train_sample( data=np.random.randn(64, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_test = gen_test_sample(data=np.random.randn(16, 4), past_seq_len=look_back) def test_forecast_lstm(self): # TODO hacking to fix a bug model = LSTMForecaster(horizon=1, feature_dim=self.x_train.shape[-1]) model.fit(self.x_train, self.y_train, validation_data=(self.x_val, self.y_val), batch_size=8, distributed=False) model.evaluate(self.x_val, self.y_val) model.predict(self.x_test) def test_forecast_mtnet(self): # TODO hacking to fix a bug model = MTNetForecaster(horizon=1, feature_dim=self.x_train.shape[-1], lb_long_steps=self.long_num, lb_long_stepsize=self.time_step) x_train_long, x_train_short = model.preprocess_input(self.x_train) x_val_long, x_val_short = model.preprocess_input(self.x_val) x_test_long, x_test_short = model.preprocess_input(self.x_test) model.fit([x_train_long, x_train_short], self.y_train, validation_data=([x_val_long, x_val_short], self.y_val), batch_size=32, distributed=False) model.evaluate([x_val_long, x_val_short], self.y_val) model.predict([x_test_long, x_test_short])
class TestZouwuModelForecast(ZooTestCase): def setup_method(self, method): tf.keras.backend.clear_session() # super(TestZouwuModelForecast, self).setup_method(method) self.ft = TimeSequenceFeatureTransformer() self.create_data() def teardown_method(self, method): pass def create_data(self): def gen_train_sample(data, past_seq_len, future_seq_len): data = pd.DataFrame(data) x, y = self.ft._roll_train(data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) return x, y def gen_test_sample(data, past_seq_len): test_data = pd.DataFrame(data) x = self.ft._roll_test(test_data, past_seq_len=past_seq_len) return x self.long_num = 6 self.time_step = 2 look_back = (self.long_num + 1) * self.time_step look_forward = 1 self.x_train, self.y_train = gen_train_sample( data=np.random.randn(64, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_test = gen_test_sample(data=np.random.randn(16, 4), past_seq_len=look_back) def test_forecast_lstm(self): # TODO hacking to fix a bug model = LSTMForecaster(target_dim=1, feature_dim=self.x_train.shape[-1]) model.fit(self.x_train, self.y_train, validation_data=(self.x_val, self.y_val), batch_size=8, distributed=False) model.evaluate(self.x_val, self.y_val) model.predict(self.x_test) def test_forecast_mtnet(self): # TODO hacking to fix a bug model = MTNetForecaster(target_dim=1, feature_dim=self.x_train.shape[-1], lb_long_steps=self.long_num, lb_long_stepsize=self.time_step) x_train_long, x_train_short = model.preprocess_input(self.x_train) x_val_long, x_val_short = model.preprocess_input(self.x_val) x_test_long, x_test_short = model.preprocess_input(self.x_test) model.fit([x_train_long, x_train_short], self.y_train, validation_data=([x_val_long, x_val_short], self.y_val), batch_size=32, distributed=False) model.evaluate([x_val_long, x_val_short], self.y_val) model.predict([x_test_long, x_test_short]) def test_forecast_tcmf(self): from zoo.zouwu.model.forecast import TCMFForecaster model = TCMFForecaster(max_y_iterations=1, init_XF_epoch=1, max_FX_epoch=1, max_TCN_epoch=1, alt_iters=2) x = np.random.rand(300, 480) model.fit(x) model.predict(x=None, horizon=24) target_value = np.random.rand(300, 24) model.evaluate(x=None, target_value=target_value, metric=['mse'])
class TestMTNetKeras(ZooTestCase): def setup_method(self, method): tf.keras.backend.clear_session() self.ft = TimeSequenceFeatureTransformer() self.create_data() self.model = MTNetKeras() self.config = {"long_num": self.long_num, "time_step": self.time_step, "ar_window": np.random.randint(1, 3), "cnn_height": np.random.randint(1, 3), "epochs": 1} def teardown_method(self, method): pass def create_data(self): def gen_train_sample(data, past_seq_len, future_seq_len): data = pd.DataFrame(data) x, y = self.ft._roll_train(data, past_seq_len=past_seq_len, future_seq_len=future_seq_len ) return x, y def gen_test_sample(data, past_seq_len): test_data = pd.DataFrame(data) x = self.ft._roll_test(test_data, past_seq_len=past_seq_len) return x self.long_num = 6 self.time_step = 2 look_back = (self.long_num + 1) * self.time_step look_forward = 1 self.x_train, self.y_train = gen_train_sample(data=np.random.randn( 64, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4), past_seq_len=look_back, future_seq_len=look_forward) self.x_test = gen_test_sample(data=np.random.randn(16, 4), past_seq_len=look_back) def test_fit_evaluate(self): self.model.fit_eval(self.x_train, self.y_train, validation_data=(self.x_val, self.y_val), **self.config) self.model.evaluate(self.x_val, self.y_val) def test_save_restore(self): self.model.fit_eval(self.x_train, self.y_train, validation_data=(self.x_val, self.y_val), **self.config) y_pred = self.model.predict(self.x_test) assert y_pred.shape == (self.x_test.shape[0], self.y_train.shape[1]) dirname = "tmp" restored_model = MTNetKeras() try: save(dirname, model=self.model) restore(dirname, model=restored_model, config=self.config) predict_after = restored_model.predict(self.x_test) assert_array_almost_equal(y_pred, predict_after, decimal=2), \ "Prediction values are not the same after restore: " \ "predict before is {}, and predict after is {}".format(y_pred, predict_after) restored_model.fit_eval(self.x_train, self.y_train, epochs=1) restored_model.evaluate(self.x_val, self.y_val) finally: shutil.rmtree("tmp") def test_predict_with_uncertainty(self): self.model.fit_eval(self.x_train, self.y_train, validation_data=(self.x_val, self.y_val), mc=True, **self.config) pred, uncertainty = self.model.predict_with_uncertainty(self.x_test, n_iter=2) assert pred.shape == (self.x_test.shape[0], self.y_train.shape[1]) assert uncertainty.shape == pred.shape assert np.any(uncertainty)