Пример #1
0
    def test_predict(self):
        train_data = pd.DataFrame(data=np.random.randn(64, 4))
        test_data = pd.DataFrame(data=np.random.randn(16, 4))
        future_seq_len = 1
        past_seq_len = 6

        # use roll method in time_sequence
        tsft = TimeSequenceFeatureTransformer()
        x_train, y_train = tsft._roll_train(train_data,
                                            past_seq_len=past_seq_len,
                                            future_seq_len=future_seq_len)
        x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len)

        config = {
            'epochs': 2,
            "lr": 0.001,
            "lstm_1_units": 16,
            "dropout_1": 0.2,
            "lstm_2_units": 10,
            "dropout_2": 0.2,
            "batch_size": 32,
        }
        model = VanillaLSTM(check_optional_config=False,
                            future_seq_len=future_seq_len)
        model.fit_eval(x_train, y_train, **config)
        y_pred = model.predict(x_test)
        assert y_pred.shape == (x_test.shape[0], 1)
Пример #2
0
    def setup_method(self, method):
        # super().setup_method(method)
        train_data = pd.DataFrame(data=np.random.randn(64, 4))
        val_data = pd.DataFrame(data=np.random.randn(16, 4))
        test_data = pd.DataFrame(data=np.random.randn(16, 4))

        future_seq_len = 1
        past_seq_len = 6

        # use roll method in time_sequence
        tsft = TimeSequenceFeatureTransformer()
        self.x_train, self.y_train = tsft._roll_train(
            train_data,
            past_seq_len=past_seq_len,
            future_seq_len=future_seq_len)
        self.x_val, self.y_val = tsft._roll_train(
            val_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len)
        self.x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len)
        self.config = {
            'epochs': 1,
            "lr": 0.001,
            "lstm_1_units": 16,
            "dropout_1": 0.2,
            "lstm_2_units": 10,
            "dropout_2": 0.2,
            "batch_size": 32,
        }
        self.model = VanillaLSTM(check_optional_config=False,
                                 future_seq_len=future_seq_len)
Пример #3
0
class TestZouwuModelMTNetForecaster(TestCase):
    def setUp(self):
        tf.keras.backend.clear_session()
        self.ft = TimeSequenceFeatureTransformer()
        self.create_data()

    def tearDown(self):
        pass

    def create_data(self):
        def gen_train_sample(data, past_seq_len, future_seq_len):
            data = pd.DataFrame(data)
            x, y = self.ft._roll_train(data,
                                       past_seq_len=past_seq_len,
                                       future_seq_len=future_seq_len)
            return x, y

        def gen_test_sample(data, past_seq_len):
            test_data = pd.DataFrame(data)
            x = self.ft._roll_test(test_data, past_seq_len=past_seq_len)
            return x

        self.long_num = 6
        self.time_step = 2
        look_back = (self.long_num + 1) * self.time_step
        look_forward = 1
        self.x_train, self.y_train = gen_train_sample(
            data=np.random.randn(64, 4),
            past_seq_len=look_back,
            future_seq_len=look_forward)
        self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4),
                                                  past_seq_len=look_back,
                                                  future_seq_len=look_forward)
        self.x_test = gen_test_sample(data=np.random.randn(16, 4),
                                      past_seq_len=look_back)

    def test_forecast_mtnet(self):
        # TODO hacking to fix a bug
        target_dim = 1
        model = MTNetForecaster(target_dim=target_dim,
                                feature_dim=self.x_train.shape[-1],
                                long_series_num=self.long_num,
                                series_length=self.time_step)
        x_train_long, x_train_short = model.preprocess_input(self.x_train)
        x_val_long, x_val_short = model.preprocess_input(self.x_val)
        x_test_long, x_test_short = model.preprocess_input(self.x_test)

        model.fit([x_train_long, x_train_short],
                  self.y_train,
                  validation_data=([x_val_long, x_val_short], self.y_val),
                  batch_size=32,
                  distributed=False)
        assert model.evaluate([x_val_long, x_val_short], self.y_val)
        predict_result = model.predict([x_test_long, x_test_short])
        assert predict_result.shape == (self.x_test.shape[0], target_dim)
Пример #4
0
class TestZouwuModelLSTMForecaster(TestCase):
    def setUp(self):
        tf.keras.backend.clear_session()
        self.ft = TimeSequenceFeatureTransformer()
        self.create_data()

    def tearDown(self):
        pass

    def create_data(self):
        def gen_train_sample(data, past_seq_len, future_seq_len):
            data = pd.DataFrame(data)
            x, y = self.ft._roll_train(data,
                                       past_seq_len=past_seq_len,
                                       future_seq_len=future_seq_len)
            return x, y

        def gen_test_sample(data, past_seq_len):
            test_data = pd.DataFrame(data)
            x = self.ft._roll_test(test_data, past_seq_len=past_seq_len)
            return x

        self.long_num = 6
        self.time_step = 2
        look_back = (self.long_num + 1) * self.time_step
        look_forward = 1
        self.x_train, self.y_train = gen_train_sample(
            data=np.random.randn(64, 4),
            past_seq_len=look_back,
            future_seq_len=look_forward)
        self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4),
                                                  past_seq_len=look_back,
                                                  future_seq_len=look_forward)
        self.x_test = gen_test_sample(data=np.random.randn(16, 4),
                                      past_seq_len=look_back)

    def test_forecast_lstm(self):
        # TODO hacking to fix a bug
        model = LSTMForecaster(target_dim=1,
                               feature_dim=self.x_train.shape[-1])
        model.fit(self.x_train,
                  self.y_train,
                  validation_data=(self.x_val, self.y_val),
                  batch_size=8,
                  distributed=False)
        model.evaluate(self.x_val, self.y_val)
        model.predict(self.x_test)
Пример #5
0
    def test_save_restore(self):
        train_data = pd.DataFrame(data=np.random.randn(64, 4))
        test_data = pd.DataFrame(data=np.random.randn(16, 4))
        future_seq_len = 1
        past_seq_len = 6

        # use roll method in time_sequence
        tsft = TimeSequenceFeatureTransformer()
        x_train, y_train = tsft._roll_train(train_data,
                                            past_seq_len=past_seq_len,
                                            future_seq_len=future_seq_len)
        x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len)

        config = {
            'epochs': 2,
            "lr": 0.001,
            "lstm_1_units": 16,
            "dropout_1": 0.2,
            "lstm_2_units": 10,
            "dropout_2": 0.2,
            "batch_size": 32,
        }

        dirname = tempfile.mkdtemp(prefix="automl_test_vanilla")
        try:
            model = VanillaLSTM(check_optional_config=False,
                                future_seq_len=future_seq_len)
            model.fit_eval(x_train, y_train, **config)
            predict_before = model.predict(x_test)

            model_path = os.path.join(dirname, "testmodel.h5")
            config_path = os.path.join(dirname, "local_config.json")

            model.save(model_path=model_path, config_path=config_path)

            local_config = load_config(config_path)
            config.update(local_config)
            model.restore(model_path=model_path, **config)
            predict_after = model.predict(x_test)
            assert np.allclose(predict_before, predict_after)
        finally:
            shutil.rmtree(dirname)
Пример #6
0
class TestSeq2Seq(ZooTestCase):
    def setup_method(self, method):
        # super().setup_method(method)
        self.train_data = pd.DataFrame(data=np.random.randn(64, 4))
        self.val_data = pd.DataFrame(data=np.random.randn(16, 4))
        self.test_data = pd.DataFrame(data=np.random.randn(16, 4))

        self.past_seq_len = 6
        self.future_seq_len_1 = 1
        self.future_seq_len_2 = 2

        # use roll method in time_sequence
        self.feat = TimeSequenceFeatureTransformer()

        self.config = {'batch_size': 32, 'epochs': 1}

        self.model_1 = LSTMSeq2Seq(check_optional_config=False,
                                   future_seq_len=self.future_seq_len_1)
        self.model_2 = LSTMSeq2Seq(check_optional_config=False,
                                   future_seq_len=self.future_seq_len_2)

        self.fitted = False
        self.predict_1 = None
        self.predict_2 = None

    def teardown_method(self, method):
        pass

    def test_fit_eval_1(self):
        x_train_1, y_train_1 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_1)
        print("fit_eval_future_seq_len_1:",
              self.model_1.fit_eval(x_train_1, y_train_1, **self.config))
        assert self.model_1.past_seq_len == 6
        assert self.model_1.feature_num == 4
        assert self.model_1.future_seq_len == 1
        assert self.model_1.target_col_num == 1

    def test_fit_eval_2(self):
        x_train_2, y_train_2 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_2)
        print("fit_eval_future_seq_len_2:",
              self.model_2.fit_eval(x_train_2, y_train_2, **self.config))
        assert self.model_2.future_seq_len == 2

        self.fitted = True

    def test_evaluate_1(self):
        x_train_1, y_train_1 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_1)
        x_val_1, y_val_1 = self.feat._roll_train(
            self.val_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_1)

        self.model_1.fit_eval(x_train_1, y_train_1, **self.config)

        print("evaluate_future_seq_len_1:",
              self.model_1.evaluate(x_val_1, y_val_1, metric=['mse', 'r2']))

    def test_evaluate_2(self):
        x_train_2, y_train_2 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_2)
        x_val_2, y_val_2 = self.feat._roll_train(
            self.val_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_2)

        self.model_2.fit_eval(x_train_2, y_train_2, **self.config)

        print("evaluate_future_seq_len_2:",
              self.model_2.evaluate(x_val_2, y_val_2, metric=['mse', 'r2']))

    def test_predict_1(self):
        x_train_1, y_train_1 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_1)
        x_test_1 = self.feat._roll_test(self.test_data,
                                        past_seq_len=self.past_seq_len)
        self.model_1.fit_eval(x_train_1, y_train_1, **self.config)

        predict_1 = self.model_1.predict(x_test_1)
        assert predict_1.shape == (x_test_1.shape[0], self.future_seq_len_1)

    def test_predict_2(self):
        x_train_2, y_train_2 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_2)
        x_test_2 = self.feat._roll_test(self.test_data,
                                        past_seq_len=self.past_seq_len)
        self.model_2.fit_eval(x_train_2, y_train_2, **self.config)

        predict_2 = self.model_2.predict(x_test_2)
        assert predict_2.shape == (x_test_2.shape[0], self.future_seq_len_2)

    def test_save_restore_1(self):
        x_train_1, y_train_1 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_1)
        x_test_1 = self.feat._roll_test(self.test_data,
                                        past_seq_len=self.past_seq_len)
        self.model_1.fit_eval(x_train_1, y_train_1, **self.config)

        predict_1_before = self.model_1.predict(x_test_1)
        new_model_1 = LSTMSeq2Seq(check_optional_config=False)

        dirname = tempfile.mkdtemp(prefix="automl_test_feature")
        try:
            save(dirname, model=self.model_1)
            restore(dirname, model=new_model_1, config=self.config)
            predict_1_after = new_model_1.predict(x_test_1)
            assert_array_almost_equal(predict_1_before, predict_1_after, decimal=2), \
                "Prediction values are not the same after restore: " \
                "predict before is {}, and predict after is {}".format(predict_1_before,
                                                                       predict_1_after)
            new_config = {'epochs': 1}
            new_model_1.fit_eval(x_train_1, y_train_1, **new_config)
        finally:
            shutil.rmtree(dirname)

    def test_save_restore_2(self):
        x_train_2, y_train_2 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_2)
        x_test_2 = self.feat._roll_test(self.test_data,
                                        past_seq_len=self.past_seq_len)
        self.model_2.fit_eval(x_train_2, y_train_2, **self.config)

        predict_2_before = self.model_2.predict(x_test_2)
        new_model_2 = LSTMSeq2Seq(check_optional_config=False)

        dirname = tempfile.mkdtemp(prefix="automl_test_feature")
        try:
            save(dirname, model=self.model_2)
            restore(dirname, model=new_model_2, config=self.config)
            predict_2_after = new_model_2.predict(x_test_2)
            assert_array_almost_equal(predict_2_before, predict_2_after, decimal=2), \
                "Prediction values are not the same after restore: " \
                "predict before is {}, and predict after is {}".format(predict_2_before,
                                                                       predict_2_after)
            new_config = {'epochs': 2}
            new_model_2.fit_eval(x_train_2, y_train_2, **new_config)
        finally:
            shutil.rmtree(dirname)

    def test_predict_with_uncertainty(self, ):
        x_train_2, y_train_2 = self.feat._roll_train(
            self.train_data,
            past_seq_len=self.past_seq_len,
            future_seq_len=self.future_seq_len_2)
        x_test_2 = self.feat._roll_test(self.test_data,
                                        past_seq_len=self.past_seq_len)
        self.model_2.fit_eval(x_train_2, y_train_2, mc=True, **self.config)
        prediction, uncertainty = self.model_2.predict_with_uncertainty(
            x_test_2, n_iter=2)
        assert prediction.shape == (x_test_2.shape[0], self.future_seq_len_2)
        assert uncertainty.shape == (x_test_2.shape[0], self.future_seq_len_2)
        assert np.any(uncertainty)

        new_model_2 = LSTMSeq2Seq(check_optional_config=False)
        dirname = tempfile.mkdtemp(prefix="automl_test_feature")
        try:
            save(dirname, model=self.model_2)
            restore(dirname, model=new_model_2, config=self.config)
            prediction, uncertainty = new_model_2.predict_with_uncertainty(
                x_test_2, n_iter=2)
            assert prediction.shape == (x_test_2.shape[0],
                                        self.future_seq_len_2)
            assert uncertainty.shape == (x_test_2.shape[0],
                                         self.future_seq_len_2)
            assert np.any(uncertainty)
        finally:
            shutil.rmtree(dirname)
Пример #7
0
class TestZouwuModelForecast(ZooTestCase):
    def setup_method(self, method):
        tf.keras.backend.clear_session()
        # super(TestZouwuModelForecast, self).setup_method(method)
        self.ft = TimeSequenceFeatureTransformer()
        self.create_data()

    def teardown_method(self, method):
        pass

    def create_data(self):
        def gen_train_sample(data, past_seq_len, future_seq_len):
            data = pd.DataFrame(data)
            x, y = self.ft._roll_train(data,
                                       past_seq_len=past_seq_len,
                                       future_seq_len=future_seq_len)
            return x, y

        def gen_test_sample(data, past_seq_len):
            test_data = pd.DataFrame(data)
            x = self.ft._roll_test(test_data, past_seq_len=past_seq_len)
            return x

        self.long_num = 6
        self.time_step = 2
        look_back = (self.long_num + 1) * self.time_step
        look_forward = 1
        self.x_train, self.y_train = gen_train_sample(
            data=np.random.randn(64, 4),
            past_seq_len=look_back,
            future_seq_len=look_forward)
        self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4),
                                                  past_seq_len=look_back,
                                                  future_seq_len=look_forward)
        self.x_test = gen_test_sample(data=np.random.randn(16, 4),
                                      past_seq_len=look_back)

    def test_forecast_lstm(self):
        # TODO hacking to fix a bug
        model = LSTMForecaster(target_dim=1,
                               feature_dim=self.x_train.shape[-1])
        model.fit(self.x_train,
                  self.y_train,
                  validation_data=(self.x_val, self.y_val),
                  batch_size=8,
                  distributed=False)
        model.evaluate(self.x_val, self.y_val)
        model.predict(self.x_test)

    def test_forecast_mtnet(self):
        # TODO hacking to fix a bug
        model = MTNetForecaster(target_dim=1,
                                feature_dim=self.x_train.shape[-1],
                                long_series_num=self.long_num,
                                series_length=self.time_step)
        x_train_long, x_train_short = model.preprocess_input(self.x_train)
        x_val_long, x_val_short = model.preprocess_input(self.x_val)
        x_test_long, x_test_short = model.preprocess_input(self.x_test)

        model.fit([x_train_long, x_train_short],
                  self.y_train,
                  validation_data=([x_val_long, x_val_short], self.y_val),
                  batch_size=32,
                  distributed=False)
        model.evaluate([x_val_long, x_val_short], self.y_val)
        model.predict([x_test_long, x_test_short])

    def test_forecast_tcmf(self):
        from zoo.zouwu.model.forecast import TCMFForecaster
        import tempfile
        model = TCMFForecaster(max_y_iterations=1,
                               init_FX_epoch=1,
                               max_FX_epoch=1,
                               max_TCN_epoch=1,
                               alt_iters=2)
        horizon = np.random.randint(1, 50)
        # construct data
        id = np.arange(300)
        data = np.random.rand(300, 480)
        input = dict({'data': data})
        with self.assertRaises(Exception) as context:
            model.fit(input)
        self.assertTrue("key `y` doesn't exist in x" in str(context.exception))
        input = dict({'id': id, 'y': data})
        with self.assertRaises(Exception) as context:
            model.is_distributed()
        self.assertTrue('You should run fit before calling is_distributed()' in
                        str(context.exception))
        model.fit(input)
        assert not model.is_distributed()
        with self.assertRaises(Exception) as context:
            model.fit(input)
        self.assertTrue('This model has already been fully trained' in str(
            context.exception))
        with self.assertRaises(Exception) as context:
            model.fit(input, incremental=True)
        self.assertTrue(
            'NotImplementedError' in context.exception.__class__.__name__)
        with tempfile.TemporaryDirectory() as tempdirname:
            model.save(tempdirname)
            loaded_model = TCMFForecaster.load(tempdirname, distributed=False)
        yhat = model.predict(x=None, horizon=horizon)
        yhat_loaded = loaded_model.predict(x=None, horizon=horizon)
        yhat_id = yhat_loaded["id"]
        assert (yhat_id == id).all()
        yhat = yhat["prediction"]
        yhat_loaded = yhat_loaded["prediction"]
        assert yhat.shape == (300, horizon)
        assert (yhat == yhat_loaded).all()
        target_value = np.random.rand(300, horizon)
        target_value = dict({"y": target_value})
        model.evaluate(x=None, target_value=target_value, metric=['mse'])

    def test_forecast_tcmf_without_id(self):
        from zoo.zouwu.model.forecast import TCMFForecaster
        import tempfile
        model = TCMFForecaster(max_y_iterations=1,
                               init_FX_epoch=1,
                               max_FX_epoch=1,
                               max_TCN_epoch=1,
                               alt_iters=2)
        horizon = np.random.randint(1, 50)
        # construct data
        id = np.arange(200)
        data = np.random.rand(300, 480)
        input = dict({'y': "abc"})
        with self.assertRaises(Exception) as context:
            model.fit(input)
        self.assertTrue(
            "the value of y should be an ndarray" in str(context.exception))
        input = dict({'id': id, 'y': data})
        with self.assertRaises(Exception) as context:
            model.fit(input)
        self.assertTrue(
            "the length of the id array should be equal to the number of" in
            str(context.exception))
        input = dict({'y': data})
        model.fit(input)
        assert not model.is_distributed()
        with self.assertRaises(Exception) as context:
            model.fit(input)
        self.assertTrue('This model has already been fully trained' in str(
            context.exception))
        with tempfile.TemporaryDirectory() as tempdirname:
            model.save(tempdirname)
            loaded_model = TCMFForecaster.load(tempdirname, distributed=False)
        yhat = model.predict(x=None, horizon=horizon)
        yhat_loaded = loaded_model.predict(x=None, horizon=horizon)
        assert "id" not in yhat_loaded
        yhat = yhat["prediction"]
        yhat_loaded = yhat_loaded["prediction"]
        assert yhat.shape == (300, horizon)
        assert (yhat == yhat_loaded).all()
        target_value = np.random.rand(300, horizon)
        target_value_fake = dict({"data": target_value})
        with self.assertRaises(Exception) as context:
            model.evaluate(x=None,
                           target_value=target_value_fake,
                           metric=['mse'])
        self.assertTrue("key y doesn't exist in y" in str(context.exception))
        target_value = dict({"y": target_value})
        model.evaluate(x=None, target_value=target_value, metric=['mse'])

    def test_forecast_tcmf_xshards(self):
        from zoo.zouwu.model.forecast import TCMFForecaster
        from zoo.orca import OrcaContext
        import zoo.orca.data.pandas
        import tempfile
        OrcaContext.pandas_read_backend = "pandas"

        def preprocessing(df, id_name, y_name):
            id = df.index
            data = df.to_numpy()
            result = dict({id_name: id, y_name: data})
            return result

        def postprocessing(pred_results, output_dt_col_name):
            id_arr = pred_results["id"]
            pred_results = pred_results["prediction"]
            pred_results = np.concatenate(
                (np.expand_dims(id_arr, axis=1), pred_results), axis=1)
            final_df = pd.DataFrame(pred_results,
                                    columns=["id"] + output_dt_col_name)
            final_df.id = final_df.id.astype("int")
            final_df = final_df.set_index("id")
            final_df.columns.name = "datetime"
            final_df = final_df.unstack().reset_index().rename(
                {0: "prediction"}, axis=1)
            return final_df

        def get_pred(d):
            return d["prediction"]

        model = TCMFForecaster(max_y_iterations=1,
                               init_FX_epoch=1,
                               max_FX_epoch=1,
                               max_TCN_epoch=1,
                               alt_iters=2)

        with tempfile.NamedTemporaryFile() as temp:
            data = np.random.rand(300, 480)
            df = pd.DataFrame(data)
            df.to_csv(temp.name)
            shard = zoo.orca.data.pandas.read_csv(temp.name)
        shard.cache()
        shard_train = shard.transform_shard(preprocessing, 'id', 'data')
        with self.assertRaises(Exception) as context:
            model.fit(shard_train)
        self.assertTrue("key `y` doesn't exist in x" in str(context.exception))
        shard_train = shard.transform_shard(preprocessing, 'cid', 'y')
        with self.assertRaises(Exception) as context:
            model.fit(shard_train)
        self.assertTrue(
            "key `id` doesn't exist in x" in str(context.exception))
        with self.assertRaises(Exception) as context:
            model.is_distributed()
        self.assertTrue('You should run fit before calling is_distributed()' in
                        str(context.exception))
        shard_train = shard.transform_shard(preprocessing, 'id', 'y')
        model.fit(shard_train)
        assert model.is_distributed()
        with self.assertRaises(Exception) as context:
            model.fit(shard_train)
        self.assertTrue('This model has already been fully trained' in str(
            context.exception))
        with self.assertRaises(Exception) as context:
            model.fit(shard_train, incremental=True)
        self.assertTrue(
            'NotImplementedError' in context.exception.__class__.__name__)
        with tempfile.TemporaryDirectory() as tempdirname:
            model.save(tempdirname + "/model")
            loaded_model = TCMFForecaster.load(tempdirname + "/model",
                                               distributed=True)
        horizon = np.random.randint(1, 50)
        yhat_shard_origin = model.predict(x=None, horizon=horizon)
        yhat_list_origin = yhat_shard_origin.collect()
        yhat_list_origin = list(map(get_pred, yhat_list_origin))
        yhat_shard = loaded_model.predict(x=None, horizon=horizon)
        yhat_list = yhat_shard.collect()
        yhat_list = list(map(get_pred, yhat_list))
        yhat_origin = np.concatenate(yhat_list_origin)
        yhat = np.concatenate(yhat_list)
        assert yhat.shape == (300, horizon)
        assert (yhat == yhat_origin).all()
        output_dt_col_name = pd.date_range(start='2020-05-01',
                                           periods=horizon,
                                           freq='H').to_list()
        yhat_df_shards = yhat_shard.transform_shard(postprocessing,
                                                    output_dt_col_name)
        final_df_list = yhat_df_shards.collect()
        final_df = pd.concat(final_df_list)
        final_df.sort_values("datetime", inplace=True)
        assert final_df.shape == (300 * horizon, 3)
        OrcaContext.pandas_read_backend = "spark"
Пример #8
0
class TestZouwuModelForecast(ZooTestCase):
    def setup_method(self, method):
        tf.keras.backend.clear_session()
        # super(TestZouwuModelForecast, self).setup_method(method)
        self.ft = TimeSequenceFeatureTransformer()
        self.create_data()

    def teardown_method(self, method):
        pass

    def create_data(self):
        def gen_train_sample(data, past_seq_len, future_seq_len):
            data = pd.DataFrame(data)
            x, y = self.ft._roll_train(data,
                                       past_seq_len=past_seq_len,
                                       future_seq_len=future_seq_len)
            return x, y

        def gen_test_sample(data, past_seq_len):
            test_data = pd.DataFrame(data)
            x = self.ft._roll_test(test_data, past_seq_len=past_seq_len)
            return x

        self.long_num = 6
        self.time_step = 2
        look_back = (self.long_num + 1) * self.time_step
        look_forward = 1
        self.x_train, self.y_train = gen_train_sample(
            data=np.random.randn(64, 4),
            past_seq_len=look_back,
            future_seq_len=look_forward)
        self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4),
                                                  past_seq_len=look_back,
                                                  future_seq_len=look_forward)
        self.x_test = gen_test_sample(data=np.random.randn(16, 4),
                                      past_seq_len=look_back)

    def test_forecast_lstm(self):
        # TODO hacking to fix a bug
        model = LSTMForecaster(horizon=1, feature_dim=self.x_train.shape[-1])
        model.fit(self.x_train,
                  self.y_train,
                  validation_data=(self.x_val, self.y_val),
                  batch_size=8,
                  distributed=False)
        model.evaluate(self.x_val, self.y_val)
        model.predict(self.x_test)

    def test_forecast_mtnet(self):
        # TODO hacking to fix a bug
        model = MTNetForecaster(horizon=1,
                                feature_dim=self.x_train.shape[-1],
                                lb_long_steps=self.long_num,
                                lb_long_stepsize=self.time_step)
        x_train_long, x_train_short = model.preprocess_input(self.x_train)
        x_val_long, x_val_short = model.preprocess_input(self.x_val)
        x_test_long, x_test_short = model.preprocess_input(self.x_test)

        model.fit([x_train_long, x_train_short],
                  self.y_train,
                  validation_data=([x_val_long, x_val_short], self.y_val),
                  batch_size=32,
                  distributed=False)
        model.evaluate([x_val_long, x_val_short], self.y_val)
        model.predict([x_test_long, x_test_short])
class TestZouwuModelForecast(ZooTestCase):
    def setup_method(self, method):
        tf.keras.backend.clear_session()
        # super(TestZouwuModelForecast, self).setup_method(method)
        self.ft = TimeSequenceFeatureTransformer()
        self.create_data()

    def teardown_method(self, method):
        pass

    def create_data(self):
        def gen_train_sample(data, past_seq_len, future_seq_len):
            data = pd.DataFrame(data)
            x, y = self.ft._roll_train(data,
                                       past_seq_len=past_seq_len,
                                       future_seq_len=future_seq_len)
            return x, y

        def gen_test_sample(data, past_seq_len):
            test_data = pd.DataFrame(data)
            x = self.ft._roll_test(test_data, past_seq_len=past_seq_len)
            return x

        self.long_num = 6
        self.time_step = 2
        look_back = (self.long_num + 1) * self.time_step
        look_forward = 1
        self.x_train, self.y_train = gen_train_sample(
            data=np.random.randn(64, 4),
            past_seq_len=look_back,
            future_seq_len=look_forward)
        self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4),
                                                  past_seq_len=look_back,
                                                  future_seq_len=look_forward)
        self.x_test = gen_test_sample(data=np.random.randn(16, 4),
                                      past_seq_len=look_back)

    def test_forecast_lstm(self):
        # TODO hacking to fix a bug
        model = LSTMForecaster(target_dim=1,
                               feature_dim=self.x_train.shape[-1])
        model.fit(self.x_train,
                  self.y_train,
                  validation_data=(self.x_val, self.y_val),
                  batch_size=8,
                  distributed=False)
        model.evaluate(self.x_val, self.y_val)
        model.predict(self.x_test)

    def test_forecast_mtnet(self):
        # TODO hacking to fix a bug
        model = MTNetForecaster(target_dim=1,
                                feature_dim=self.x_train.shape[-1],
                                lb_long_steps=self.long_num,
                                lb_long_stepsize=self.time_step)
        x_train_long, x_train_short = model.preprocess_input(self.x_train)
        x_val_long, x_val_short = model.preprocess_input(self.x_val)
        x_test_long, x_test_short = model.preprocess_input(self.x_test)

        model.fit([x_train_long, x_train_short],
                  self.y_train,
                  validation_data=([x_val_long, x_val_short], self.y_val),
                  batch_size=32,
                  distributed=False)
        model.evaluate([x_val_long, x_val_short], self.y_val)
        model.predict([x_test_long, x_test_short])

    def test_forecast_tcmf(self):
        from zoo.zouwu.model.forecast import TCMFForecaster
        model = TCMFForecaster(max_y_iterations=1,
                               init_XF_epoch=1,
                               max_FX_epoch=1,
                               max_TCN_epoch=1,
                               alt_iters=2)
        x = np.random.rand(300, 480)
        model.fit(x)
        model.predict(x=None, horizon=24)
        target_value = np.random.rand(300, 24)
        model.evaluate(x=None, target_value=target_value, metric=['mse'])
Пример #10
0
class TestMTNetKeras(ZooTestCase):

    def setup_method(self, method):
        tf.keras.backend.clear_session()
        self.ft = TimeSequenceFeatureTransformer()
        self.create_data()
        self.model = MTNetKeras()
        self.config = {"long_num": self.long_num,
                       "time_step": self.time_step,
                       "ar_window": np.random.randint(1, 3),
                       "cnn_height": np.random.randint(1, 3),
                       "epochs": 1}

    def teardown_method(self, method):
        pass

    def create_data(self):
        def gen_train_sample(data, past_seq_len, future_seq_len):
            data = pd.DataFrame(data)
            x, y = self.ft._roll_train(data,
                                       past_seq_len=past_seq_len,
                                       future_seq_len=future_seq_len
                                       )
            return x, y

        def gen_test_sample(data, past_seq_len):
            test_data = pd.DataFrame(data)
            x = self.ft._roll_test(test_data, past_seq_len=past_seq_len)
            return x

        self.long_num = 6
        self.time_step = 2
        look_back = (self.long_num + 1) * self.time_step
        look_forward = 1
        self.x_train, self.y_train = gen_train_sample(data=np.random.randn(
            64, 4), past_seq_len=look_back, future_seq_len=look_forward)
        self.x_val, self.y_val = gen_train_sample(data=np.random.randn(16, 4),
                                                  past_seq_len=look_back,
                                                  future_seq_len=look_forward)
        self.x_test = gen_test_sample(data=np.random.randn(16, 4),
                                      past_seq_len=look_back)

    def test_fit_evaluate(self):
        self.model.fit_eval(self.x_train, self.y_train,
                            validation_data=(self.x_val, self.y_val),
                            **self.config)
        self.model.evaluate(self.x_val, self.y_val)

    def test_save_restore(self):
        self.model.fit_eval(self.x_train, self.y_train,
                            validation_data=(self.x_val, self.y_val),
                            **self.config)
        y_pred = self.model.predict(self.x_test)
        assert y_pred.shape == (self.x_test.shape[0], self.y_train.shape[1])
        dirname = "tmp"
        restored_model = MTNetKeras()
        try:
            save(dirname, model=self.model)
            restore(dirname, model=restored_model, config=self.config)
            predict_after = restored_model.predict(self.x_test)
            assert_array_almost_equal(y_pred, predict_after, decimal=2), \
                "Prediction values are not the same after restore: " \
                "predict before is {}, and predict after is {}".format(y_pred, predict_after)
            restored_model.fit_eval(self.x_train, self.y_train, epochs=1)
            restored_model.evaluate(self.x_val, self.y_val)
        finally:
            shutil.rmtree("tmp")

    def test_predict_with_uncertainty(self):
        self.model.fit_eval(self.x_train, self.y_train,
                            validation_data=(self.x_val, self.y_val),
                            mc=True,
                            **self.config)
        pred, uncertainty = self.model.predict_with_uncertainty(self.x_test, n_iter=2)
        assert pred.shape == (self.x_test.shape[0], self.y_train.shape[1])
        assert uncertainty.shape == pred.shape
        assert np.any(uncertainty)