예제 #1
0
    def setup_method(self, method):
        # super().setup_method(method)
        self.train_data = pd.DataFrame(data=np.random.randn(64, 4))
        self.val_data = pd.DataFrame(data=np.random.randn(16, 4))
        self.test_data = pd.DataFrame(data=np.random.randn(16, 4))

        self.past_seq_len = 6
        self.future_seq_len_1 = 1
        self.future_seq_len_2 = 2

        # use roll method in time_sequence
        self.feat = TimeSequenceFeatureTransformer()

        self.config = {
            'batch_size': 32,
            'epochs': 1
        }

        self.model_1 = LSTMSeq2Seq(check_optional_config=False,
                                   future_seq_len=self.future_seq_len_1)
        self.model_2 = LSTMSeq2Seq(check_optional_config=False,
                                   future_seq_len=self.future_seq_len_2)

        self.fitted = False
        self.predict_1 = None
        self.predict_2 = None
class TestZouwuModelMTNetForecaster(TestCase):
    def setUp(self):
        tf.keras.backend.clear_session()
        self.ft = TimeSequenceFeatureTransformer()
        self.create_data()

    def tearDown(self):
        pass

    def create_data(self):
        def gen_train_sample(data, past_seq_len, future_seq_len):
            data = pd.DataFrame(data)
            x, y = self.ft._roll_train(data,
                                       past_seq_len=past_seq_len,
                                       future_seq_len=future_seq_len)
            return x, y

        def gen_test_sample(data, past_seq_len):
            test_data = pd.DataFrame(data)
            x = self.ft._roll_test(test_data, past_seq_len=past_seq_len)
            return x

        self.long_num = 6
        self.time_step = 2
        look_back = (self.long_num + 1) * self.time_step
        look_forward = 1
        self.x_train, self.y_train = gen_train_sample(
            data=np.random.randn(64, 4),
            past_seq_len=look_back,
            future_seq_len=look_forward)
        self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4),
                                                  past_seq_len=look_back,
                                                  future_seq_len=look_forward)
        self.x_test = gen_test_sample(data=np.random.randn(16, 4),
                                      past_seq_len=look_back)

    def test_forecast_mtnet(self):
        # TODO hacking to fix a bug
        target_dim = 1
        model = MTNetForecaster(target_dim=target_dim,
                                feature_dim=self.x_train.shape[-1],
                                long_series_num=self.long_num,
                                series_length=self.time_step)
        x_train_long, x_train_short = model.preprocess_input(self.x_train)
        x_val_long, x_val_short = model.preprocess_input(self.x_val)
        x_test_long, x_test_short = model.preprocess_input(self.x_test)

        model.fit([x_train_long, x_train_short],
                  self.y_train,
                  validation_data=([x_val_long, x_val_short], self.y_val),
                  batch_size=32,
                  distributed=False)
        assert model.evaluate([x_val_long, x_val_short], self.y_val)
        predict_result = model.predict([x_test_long, x_test_short])
        assert predict_result.shape == (self.x_test.shape[0], target_dim)
예제 #3
0
 def setup_method(self, method):
     tf.keras.backend.clear_session()
     self.ft = TimeSequenceFeatureTransformer()
     self.create_data()
     self.model = MTNetKeras()
     self.config = {
         "long_num": self.long_num,
         "time_step": self.time_step,
         "ar_window": np.random.randint(1, 3),
         "cnn_height": np.random.randint(1, 3),
         "epochs": 1
     }
예제 #4
0
    def fit_with_fixed_configs(self,
                               input_df,
                               validation_df=None,
                               mc=False,
                               **user_configs):
        """
        Fit pipeline with fixed configs. The model will be trained from initialization
        with the hyper-parameter specified in configs. The configs contain both identity configs
        (Eg. "future_seq_len", "dt_col", "target_col", "metric") and automl tunable configs
        (Eg. "past_seq_len", "batch_size").
        We recommend calling get_default_configs to see the name and default values of configs you
        you can specify.
        :param input_df: one data frame or a list of data frames
        :param validation_df: one data frame or a list of data frames
        :param user_configs: you can overwrite or add more configs with user_configs. Eg. "epochs"
        :return:
        """
        # self._check_configs()
        if self.config is None:
            self.config = self.get_default_configs()
        if user_configs is not None:
            self.config.update(user_configs)
        ft_id_config_set = {
            'future_seq_len', 'dt_col', 'target_col', 'extra_features_col',
            'drop_missing'
        }
        ft_id_configs = {a: self.config[a] for a in ft_id_config_set}
        self.feature_transformers = TimeSequenceFeatureTransformer(
            **ft_id_configs)
        model_id_config_set = {'future_seq_len'}
        ft_id_configs = {a: self.config[a] for a in model_id_config_set}
        self.model = TimeSequenceModel(check_optional_config=False,
                                       **ft_id_configs)
        all_available_features = self.feature_transformers.get_feature_list(
            input_df)
        self.config.update({"selected_features": all_available_features})
        (x_train, y_train) = self.feature_transformers.fit_transform(
            input_df, **self.config)
        if self._is_val_df_valid(validation_df):
            validation_data = self.feature_transformers.transform(
                validation_df)
        else:
            validation_data = None

        self.model.fit_eval(x_train,
                            y_train,
                            validation_data=validation_data,
                            mc=mc,
                            verbose=1,
                            **self.config)
예제 #5
0
 def test_dataframe_input_with_datetime(self):
     train_df, validation_df, future_seq_len = get_ts_input()
     ft = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len,
                                         dt_col="datetime",
                                         target_col="value")
     input_dim = len(ft.get_feature_list()) + 1
     searcher = prepare_searcher(data=train_df,
                                 validation_data=validation_df,
                                 model_creator=LSTM_model_creator,
                                 name='test_ray_dateframe_with_datetime_with_val',
                                 recipe=create_lstm_recipe(input_dim),
                                 feature_transformer=ft)
     searcher.run()
     best_trials = searcher.get_best_trials(k=1)
     assert best_trials is not None
예제 #6
0
    def test_evaluate_predict_future_more_1(self):
        target_col = "values"
        metrics = ["mse", "r2"]
        future_seq_len = np.random.randint(2, 6)
        train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col)
        pipeline = tsp.fit(train_df, test_df)
        mse, rs = pipeline.evaluate(test_df, metrics=metrics)
        assert len(mse) == future_seq_len
        assert len(rs) == future_seq_len
        y_pred = pipeline.predict(test_df)
        assert y_pred.shape == (test_sample_num - default_past_seq_len + 1,
                                future_seq_len + 1)

        y_pred_df = pipeline.predict(test_df[:-future_seq_len])
        columns = ["{}_{}".format(target_col, i) for i in range(future_seq_len)]
        y_pred_value = y_pred_df[columns].values

        y_df = test_df[default_past_seq_len:]
        y_value = TimeSequenceFeatureTransformer()._roll_test(y_df[target_col], future_seq_len)

        mse_pred_eval, rs_pred_eval = [Evaluator.evaluate(m, y_value, y_pred_value)
                                       for m in metrics]
        mse_eval, rs_eval = pipeline.evaluate(test_df, metrics)
        assert_array_almost_equal(mse_pred_eval, mse_eval, decimal=2)
        assert_array_almost_equal(rs_pred_eval, rs_eval, decimal=2)
예제 #7
0
def load_ts_pipeline(file):
    feature_transformers = TimeSequenceFeatureTransformer()
    model = TimeSequenceModel(check_optional_config=False)

    all_config = restore_zip(file, feature_transformers, model)
    ts_pipeline = TimeSequencePipeline(feature_transformers=feature_transformers,
                                       model=model,
                                       config=all_config)
    print("Restore pipeline from", file)
    return ts_pipeline
 def test_dataframe_input_with_datetime(self):
     train_df, validation_df, future_seq_len = get_ts_input()
     dataframe_with_datetime = {'df': train_df, 'val_df': validation_df}
     ft = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len,
                                         dt_col="datetime",
                                         target_col="value")
     searcher = prepare_searcher(data=dataframe_with_datetime,
                                 model_creator=LSTM_model_creator,
                                 name='test_ray_dateframe_with_datetime_with_val',
                                 feature_transformer=ft)
     searcher.run()
     best_trials = searcher.get_best_trials(k=1)
     assert best_trials is not None
예제 #9
0
class TestSeq2Seq(ZooTestCase):
    def setup_method(self, method):
        # super().setup_method(method)
        self.train_data = pd.DataFrame(data=np.random.randn(64, 4))
        self.val_data = pd.DataFrame(data=np.random.randn(16, 4))
        self.test_data = pd.DataFrame(data=np.random.randn(16, 4))

        self.past_seq_len = 6
        self.future_seq_len_1 = 1
        self.future_seq_len_2 = 2

        # use roll method in time_sequence
        self.feat = TimeSequenceFeatureTransformer()

        self.config = {'batch_size': 32, 'epochs': 1}

        self.model_1 = LSTMSeq2Seq(check_optional_config=False,
                                   future_seq_len=self.future_seq_len_1)
        self.model_2 = LSTMSeq2Seq(check_optional_config=False,
                                   future_seq_len=self.future_seq_len_2)

        self.fitted = False
        self.predict_1 = None
        self.predict_2 = None

    def teardown_method(self, method):
        pass

    def test_fit_eval_1(self):
        x_train_1, y_train_1 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_1)
        print("fit_eval_future_seq_len_1:",
              self.model_1.fit_eval(x_train_1, y_train_1, **self.config))
        assert self.model_1.past_seq_len == 6
        assert self.model_1.feature_num == 4
        assert self.model_1.future_seq_len == 1
        assert self.model_1.target_col_num == 1

    def test_fit_eval(self):
        past_seq_len = 6
        future_seq_len = 2
        input_dim = 5
        output_dim = 4
        x_train = np.random.rand(100, past_seq_len, input_dim)
        y_train = np.random.rand(100, future_seq_len, output_dim)
        x_test = np.random.rand(100, past_seq_len, input_dim)
        y_test = np.random.rand(100, future_seq_len, output_dim)
        model = LSTMSeq2Seq(check_optional_config=False,
                            future_seq_len=future_seq_len)
        model_config = {
            'batch_size': 32,
            'epochs': 1,
            'latent_dim': 128,
            'dropout': 0.2
        }
        model.fit_eval(x_train, y_train, **model_config)
        y_pred = model.predict(x_test)
        rmse, smape = model.evaluate(x=x_test,
                                     y=y_test,
                                     metric=["rmse", "smape"])
        assert rmse.shape == smape.shape
        assert rmse.shape == (future_seq_len, output_dim)

        assert model.past_seq_len == past_seq_len
        assert model.future_seq_len == future_seq_len
        assert model.feature_num == input_dim
        assert model.target_col_num == output_dim
        assert y_pred.shape == y_test.shape

    def test_fit_eval_2(self):
        x_train_2, y_train_2 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_2)
        print("fit_eval_future_seq_len_2:",
              self.model_2.fit_eval(x_train_2, y_train_2, **self.config))
        assert self.model_2.future_seq_len == 2

        self.fitted = True

    def test_evaluate_1(self):
        x_train_1, y_train_1 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_1)
        x_val_1, y_val_1 = self.feat._roll_train(
            self.val_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_1)

        self.model_1.fit_eval(x_train_1, y_train_1, **self.config)

        print("evaluate_future_seq_len_1:",
              self.model_1.evaluate(x_val_1, y_val_1, metric=['mse', 'r2']))

    def test_evaluate_2(self):
        x_train_2, y_train_2 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_2)
        x_val_2, y_val_2 = self.feat._roll_train(
            self.val_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_2)

        self.model_2.fit_eval(x_train_2, y_train_2, **self.config)

        print("evaluate_future_seq_len_2:",
              self.model_2.evaluate(x_val_2, y_val_2, metric=['mse', 'r2']))

    def test_predict_1(self):
        x_train_1, y_train_1 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_1)
        x_test_1 = self.feat._roll_test(self.test_data,
                                        past_seq_len=self.past_seq_len)
        self.model_1.fit_eval(x_train_1, y_train_1, **self.config)

        predict_1 = self.model_1.predict(x_test_1)
        assert predict_1.shape == (x_test_1.shape[0], self.future_seq_len_1)

    def test_predict_2(self):
        x_train_2, y_train_2 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_2)
        x_test_2 = self.feat._roll_test(self.test_data,
                                        past_seq_len=self.past_seq_len)
        self.model_2.fit_eval(x_train_2, y_train_2, **self.config)

        predict_2 = self.model_2.predict(x_test_2)
        assert predict_2.shape == (x_test_2.shape[0], self.future_seq_len_2)

    def test_save_restore_1(self):
        x_train_1, y_train_1 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_1)
        x_test_1 = self.feat._roll_test(self.test_data,
                                        past_seq_len=self.past_seq_len)
        self.model_1.fit_eval(x_train_1, y_train_1, **self.config)

        predict_1_before = self.model_1.predict(x_test_1)
        new_model_1 = LSTMSeq2Seq(check_optional_config=False)

        dirname = tempfile.mkdtemp(prefix="automl_test_feature")
        try:
            save(dirname, model=self.model_1)
            restore(dirname, model=new_model_1, config=self.config)
            predict_1_after = new_model_1.predict(x_test_1)
            assert_array_almost_equal(predict_1_before, predict_1_after, decimal=2), \
                "Prediction values are not the same after restore: " \
                "predict before is {}, and predict after is {}".format(predict_1_before,
                                                                       predict_1_after)
            new_config = {'epochs': 1}
            new_model_1.fit_eval(x_train_1, y_train_1, **new_config)
        finally:
            shutil.rmtree(dirname)

    def test_save_restore_2(self):
        x_train_2, y_train_2 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_2)
        x_test_2 = self.feat._roll_test(self.test_data,
                                        past_seq_len=self.past_seq_len)
        self.model_2.fit_eval(x_train_2, y_train_2, **self.config)

        predict_2_before = self.model_2.predict(x_test_2)
        new_model_2 = LSTMSeq2Seq(check_optional_config=False)

        dirname = tempfile.mkdtemp(prefix="automl_test_feature")
        try:
            save(dirname, model=self.model_2)
            restore(dirname, model=new_model_2, config=self.config)
            predict_2_after = new_model_2.predict(x_test_2)
            assert_array_almost_equal(predict_2_before, predict_2_after, decimal=2), \
                "Prediction values are not the same after restore: " \
                "predict before is {}, and predict after is {}".format(predict_2_before,
                                                                       predict_2_after)
            new_config = {'epochs': 2}
            new_model_2.fit_eval(x_train_2, y_train_2, **new_config)
        finally:
            shutil.rmtree(dirname)

    def test_predict_with_uncertainty(self, ):
        x_train_2, y_train_2 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_2)
        x_test_2 = self.feat._roll_test(self.test_data,
                                        past_seq_len=self.past_seq_len)
        self.model_2.fit_eval(x_train_2, y_train_2, mc=True, **self.config)
        prediction, uncertainty = self.model_2.predict_with_uncertainty(
            x_test_2, n_iter=2)
        assert prediction.shape == (x_test_2.shape[0], self.future_seq_len_2)
        assert uncertainty.shape == (x_test_2.shape[0], self.future_seq_len_2)
        assert np.any(uncertainty)

        new_model_2 = LSTMSeq2Seq(check_optional_config=False)
        dirname = tempfile.mkdtemp(prefix="automl_test_feature")
        try:
            save(dirname, model=self.model_2)
            restore(dirname, model=new_model_2, config=self.config)
            prediction, uncertainty = new_model_2.predict_with_uncertainty(
                x_test_2, n_iter=2)
            assert prediction.shape == (x_test_2.shape[0],
                                        self.future_seq_len_2)
            assert uncertainty.shape == (x_test_2.shape[0],
                                         self.future_seq_len_2)
            assert np.any(uncertainty)
        finally:
            shutil.rmtree(dirname)
 def setUp(self):
     tf.keras.backend.clear_session()
     self.ft = TimeSequenceFeatureTransformer()
     self.create_data()
예제 #11
0
class TimeSequencePipeline(Pipeline):
    def __init__(self,
                 feature_transformers=None,
                 model=None,
                 config=None,
                 name=None):
        """
        initialize a pipeline
        :param model: the internal model
        :param feature_transformers: the feature transformers
        """
        self.feature_transformers = feature_transformers
        self.model = model
        self.config = config
        self.name = name
        self.time = time.strftime("%Y%m%d-%H%M%S")

    def describe(self):
        init_info = [
            'future_seq_len', 'dt_col', 'target_col', 'extra_features_col',
            'drop_missing'
        ]
        print("**** Initialization info ****")
        for info in init_info:
            print(info + ":", self.config[info])
        print("")

    def fit(self, input_df, validation_df=None, mc=False, epoch_num=20):
        x, y = self.feature_transformers.transform(input_df, is_train=True)
        if validation_df is not None and not validation_df.empty:
            validation_data = self.feature_transformers.transform(
                validation_df)
        else:
            validation_data = None
        new_config = {'epochs': epoch_num}
        self.model.fit_eval(x,
                            y,
                            validation_data,
                            mc=mc,
                            verbose=1,
                            **new_config)
        print('Fit done!')

    def _is_val_df_valid(self, validation_df):
        df_not_empty = isinstance(validation_df,
                                  pd.DataFrame) and not validation_df.empty
        df_list_not_empty = isinstance(validation_df, list) \
            and validation_df and not all([d.empty for d in validation_df])
        if validation_df is not None and (df_not_empty or df_list_not_empty):
            return True
        else:
            return False

    def _check_configs(self):
        required_configs = {'future_seq_len'}
        if not self.config.keys() & required_configs:
            raise ValueError("Missing required parameters in configuration. " +
                             "Required parameters are: " +
                             str(required_configs))
        default_config = {
            'dt_col': 'datetime',
            'target_col': 'value',
            'extra_features_col': None,
            'drop_missing': True,
            'past_seq_len': 2,
            'batch_size': 64,
            'lr': 0.001,
            'dropout': 0.2,
            'epochs': 10,
            'metric': 'mse'
        }
        for config, value in default_config.items():
            if config not in self.config:
                print('Config: \'{}\' is not specified. '
                      'A default value of {} will be used.'.format(
                          config, value))

    def get_default_configs(self):
        default_configs = {
            'dt_col': 'datetime',
            'target_col': 'value',
            'extra_features_col': None,
            'drop_missing': True,
            'future_seq_len': 1,
            'past_seq_len': 2,
            'batch_size': 64,
            'lr': 0.001,
            'dropout': 0.2,
            'epochs': 10,
            'metric': 'mean_squared_error'
        }
        print("**** default config: ****")
        for config in default_configs:
            print(config + ":", default_configs[config])
        print(
            "You can change any fields in the default configs by passing into "
            "fit_with_fixed_configs(). Otherwise, the default values will be used."
        )
        return default_configs

    def fit_with_fixed_configs(self,
                               input_df,
                               validation_df=None,
                               mc=False,
                               **user_configs):
        """
        Fit pipeline with fixed configs. The model will be trained from initialization
        with the hyper-parameter specified in configs. The configs contain both identity configs
        (Eg. "future_seq_len", "dt_col", "target_col", "metric") and automl tunable configs
        (Eg. "past_seq_len", "batch_size").
        We recommend calling get_default_configs to see the name and default values of configs you
        you can specify.
        :param input_df: one data frame or a list of data frames
        :param validation_df: one data frame or a list of data frames
        :param user_configs: you can overwrite or add more configs with user_configs. Eg. "epochs"
        :return:
        """
        # self._check_configs()
        if self.config is None:
            self.config = self.get_default_configs()
        if user_configs is not None:
            self.config.update(user_configs)
        ft_id_config_set = {
            'future_seq_len', 'dt_col', 'target_col', 'extra_features_col',
            'drop_missing'
        }
        ft_id_configs = {a: self.config[a] for a in ft_id_config_set}
        self.feature_transformers = TimeSequenceFeatureTransformer(
            **ft_id_configs)
        model_id_config_set = {'future_seq_len'}
        ft_id_configs = {a: self.config[a] for a in model_id_config_set}
        self.model = TimeSequenceModel(check_optional_config=False,
                                       **ft_id_configs)
        all_available_features = self.feature_transformers.get_feature_list(
            input_df)
        self.config.update({"selected_features": all_available_features})
        (x_train, y_train) = self.feature_transformers.fit_transform(
            input_df, **self.config)
        if self._is_val_df_valid(validation_df):
            validation_data = self.feature_transformers.transform(
                validation_df)
        else:
            validation_data = None

        self.model.fit_eval(x_train,
                            y_train,
                            validation_data=validation_data,
                            mc=mc,
                            verbose=1,
                            **self.config)

    def evaluate(self, input_df, metrics=["mse"], multioutput='raw_values'):
        """
        evaluate the pipeline
        :param input_df:
        :param metrics: subset of ['mean_squared_error', 'r_square', 'sMAPE']
        :param multioutput: string in ['raw_values', 'uniform_average']
                'raw_values' :
                    Returns a full set of errors in case of multioutput input.
                'uniform_average' :
                    Errors of all outputs are averaged with uniform weight.
        :return:
        """
        if isinstance(metrics, str):
            metrics = [metrics]
        # if not isinstance(metrics, list):
        #    raise ValueError("Expected metrics to be a list!")

        x, y = self.feature_transformers.transform(input_df, is_train=True)
        y_pred = self.model.predict(x)
        if len(y_pred.shape) > 1 and y_pred.shape[1] == 1:
            multioutput = 'uniform_average'
        y_unscale, y_pred_unscale = self.feature_transformers.post_processing(
            input_df, y_pred, is_train=True)

        return [
            Evaluator.evaluate(m,
                               y_unscale,
                               y_pred_unscale,
                               multioutput=multioutput) for m in metrics
        ]

    def predict(self, input_df):
        """
        predict test data with the pipeline fitted
        :param input_df:
        :return:
        """
        x, _ = self.feature_transformers.transform(input_df, is_train=False)
        y_pred = self.model.predict(x)
        y_output = self.feature_transformers.post_processing(input_df,
                                                             y_pred,
                                                             is_train=False)
        return y_output

    def predict_with_uncertainty(self, input_df, n_iter=100):
        x, _ = self.feature_transformers.transform(input_df, is_train=False)
        y_pred, y_pred_uncertainty = self.model.predict_with_uncertainty(
            x=x, n_iter=n_iter)
        y_output = self.feature_transformers.post_processing(input_df,
                                                             y_pred,
                                                             is_train=False)
        y_uncertainty = self.feature_transformers.unscale_uncertainty(
            y_pred_uncertainty)
        return y_output, y_uncertainty

    def save(self, ppl_file=None):
        """
        save pipeline to file, contains feature transformer, model, trial config.
        :param ppl_file:
        :return:
        """
        ppl_file = ppl_file or os.path.join(
            DEFAULT_PPL_DIR, "{}_{}.ppl".format(self.name, self.time))
        save_zip(ppl_file, self.feature_transformers, self.model, self.config)
        print("Pipeline is saved in", ppl_file)
        return ppl_file

    def config_save(self, config_file=None):
        """
        save all configs to file.
        :param config_file:
        :return:
        """
        config_file = config_file or os.path.join(
            DEFAULT_CONFIG_DIR, "{}_{}.json".format(self.name, self.time))
        save_config(config_file, self.config, replace=True)
        return config_file
예제 #12
0
 def setup_method(self, method):
     self.ft = TimeSequenceFeatureTransformer()
     self.create_data()
 def create_feature_transformer(self):
     ft = TimeSequenceFeatureTransformer(self.future_seq_len, self.dt_col,
                                         self.target_col,
                                         self.extra_features_col,
                                         self.drop_missing)
     return ft
예제 #14
0
class TestMTNetKeras(ZooTestCase):
    def setup_method(self, method):
        tf.keras.backend.clear_session()
        self.ft = TimeSequenceFeatureTransformer()
        self.create_data()
        self.model = MTNetKeras()
        self.config = {
            "long_num": self.long_num,
            "time_step": self.time_step,
            "ar_window": np.random.randint(1, 3),
            "cnn_height": np.random.randint(1, 3),
            "epochs": 1
        }

    def teardown_method(self, method):
        pass

    def create_data(self):
        def gen_train_sample(data, past_seq_len, future_seq_len):
            data = pd.DataFrame(data)
            x, y = self.ft._roll_train(data,
                                       past_seq_len=past_seq_len,
                                       future_seq_len=future_seq_len)
            return x, y

        def gen_test_sample(data, past_seq_len):
            test_data = pd.DataFrame(data)
            x = self.ft._roll_test(test_data, past_seq_len=past_seq_len)
            return x

        self.long_num = 6
        self.time_step = 2
        look_back = (self.long_num + 1) * self.time_step
        look_forward = 1
        self.x_train, self.y_train = gen_train_sample(
            data=np.random.randn(64, 4),
            past_seq_len=look_back,
            future_seq_len=look_forward)
        self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4),
                                                  past_seq_len=look_back,
                                                  future_seq_len=look_forward)
        self.x_test = gen_test_sample(data=np.random.randn(16, 4),
                                      past_seq_len=look_back)

    def test_fit_evaluate(self):
        self.model.fit_eval(data=(self.x_train, self.y_train),
                            validation_data=(self.x_val, self.y_val),
                            **self.config)
        self.model.evaluate(self.x_val, self.y_val)

    def test_save_restore(self):
        self.model.fit_eval(data=(self.x_train, self.y_train),
                            validation_data=(self.x_val, self.y_val),
                            **self.config)
        y_pred = self.model.predict(self.x_test)
        assert y_pred.shape == (self.x_test.shape[0], self.y_train.shape[1])
        dirname = "tmp"
        restored_model = MTNetKeras()
        try:
            save(dirname, model=self.model)
            restore(dirname, model=restored_model, config=self.config)
            predict_after = restored_model.predict(self.x_test)
            assert_array_almost_equal(y_pred, predict_after, decimal=2), \
                "Prediction values are not the same after restore: " \
                "predict before is {}, and predict after is {}".format(y_pred, predict_after)
            restored_model.fit_eval((self.x_train, self.y_train), epochs=1)
            restored_model.evaluate(self.x_val, self.y_val)
        finally:
            shutil.rmtree("tmp")

    def test_predict_with_uncertainty(self):
        self.model.fit_eval(data=(self.x_train, self.y_train),
                            validation_data=(self.x_val, self.y_val),
                            mc=True,
                            **self.config)
        pred, uncertainty = self.model.predict_with_uncertainty(self.x_test,
                                                                n_iter=2)
        assert pred.shape == (self.x_test.shape[0], self.y_train.shape[1])
        assert uncertainty.shape == pred.shape
        assert np.any(uncertainty)