def test_fit_lstm_data_creator(self):
        input_feature_dim = 4
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'layer_num': hp.randint(1, 3),
            'lr': hp.choice([0.001, 0.003, 0.01]),
            'dropout': hp.uniform(0.1, 0.2)
        }
        auto_estimator = AutoTSEstimator(model='lstm',
                                         search_space=search_space,
                                         past_seq_len=7,
                                         future_seq_len=1,
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         logs_dir="/tmp/auto_trainer",
                                         cpus_per_trial=2,
                                         name="auto_trainer")
        auto_estimator.fit(data=get_data_creator(),
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=get_data_creator(),
                           n_sampling=1)
        config = auto_estimator.get_best_config()
        assert config["past_seq_len"] == 7
    def test_fit_third_party_data_creator(self):
        input_feature_dim = 4
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        auto_estimator = AutoTSEstimator(model=model_creator,
                                         search_space=search_space,
                                         past_seq_len=7,
                                         future_seq_len=1,
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2)

        auto_estimator.fit(data=get_data_creator(),
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=get_data_creator(),
                           n_sampling=1)

        config = auto_estimator.get_best_config()
        assert config["past_seq_len"] == 7
    def test_fit_third_party_feature(self):
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True)
        tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler,
                                                              fit=False)

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        auto_estimator = AutoTSEstimator(model=model_creator,
                                         search_space=search_space,
                                         past_seq_len=hp.randint(4, 6),
                                         future_seq_len=1,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2)

        ts_pipeline = auto_estimator.fit(data=tsdata_train,
                                         epochs=1,
                                         batch_size=hp.choice([32, 64]),
                                         validation_data=tsdata_valid,
                                         n_sampling=1)
        best_config = auto_estimator.get_best_config()
        best_model = auto_estimator._get_best_automl_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        assert isinstance(ts_pipeline, TSPipeline)

        # use raw base model to predic and evaluate
        tsdata_valid.roll(lookback=best_config["past_seq_len"],
                          horizon=0,
                          feature_col=best_config["selected_features"])
        x_valid, y_valid = tsdata_valid.to_numpy()
        y_pred_raw = best_model.predict(x_valid)
        y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw)

        # use tspipeline to predic and evaluate
        eval_result = ts_pipeline.evaluate(tsdata_valid)
        y_pred = ts_pipeline.predict(tsdata_valid)

        # check if they are the same
        np.testing.assert_almost_equal(y_pred, y_pred_raw)

        # save and load
        ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_3rdparty")
        new_ts_pipeline = TSPipeline.load(
            "/tmp/auto_trainer/autots_tmp_model_3rdparty")

        # check if load ppl is the same as previous
        eval_result_new = new_ts_pipeline.evaluate(tsdata_valid)
        y_pred_new = new_ts_pipeline.predict(tsdata_valid)
        np.testing.assert_almost_equal(eval_result[0], eval_result_new[0])
        np.testing.assert_almost_equal(y_pred, y_pred_new)

        # use tspipeline to incrementally train
        new_ts_pipeline.fit(tsdata_valid)
예제 #4
0
    def test_select_feature(self):
        sample_num = np.random.randint(100, 200)
        df = pd.DataFrame({
            "datetime":
            pd.date_range('1/1/2019', periods=sample_num),
            "value":
            np.random.randn(sample_num),
            "id":
            np.array(['00'] * sample_num)
        })
        train_ts, val_ts, _ = TSDataset.from_pandas(df,
                                                    target_col=['value'],
                                                    dt_col='datetime',
                                                    id_col='id',
                                                    with_split=True,
                                                    val_ratio=0.1)

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'layer_num': hp.randint(1, 3),
            'lr': hp.choice([0.001, 0.003, 0.01]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        input_feature_dim, output_feature_dim = 1, 1
        auto_estimator = AutoTSEstimator(model='lstm',
                                         search_space=search_space,
                                         past_seq_len=6,
                                         future_seq_len=1,
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2,
                                         name="auto_trainer")

        auto_estimator.fit(data=train_ts,
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=val_ts,
                           n_sampling=1)
        config = auto_estimator.get_best_config()
        assert config['past_seq_len'] == 6
예제 #5
0
    def test_fit_seq2seq_feature(self):
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True)
        tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler,
                                                              fit=False)

        auto_estimator = AutoTSEstimator(model='seq2seq',
                                         search_space="minimal",
                                         past_seq_len=hp.randint(4, 6),
                                         future_seq_len=1,
                                         selected_features="auto",
                                         metric="mse",
                                         optimizer="Adam",
                                         loss=torch.nn.MSELoss(),
                                         logs_dir="/tmp/auto_trainer",
                                         cpus_per_trial=2,
                                         name="auto_trainer")
        ts_pipeline = auto_estimator.fit(data=tsdata_train,
                                         epochs=1,
                                         batch_size=hp.choice([32, 64]),
                                         validation_data=tsdata_valid,
                                         n_sampling=1)
        best_config = auto_estimator.get_best_config()
        best_model = auto_estimator._get_best_automl_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        assert isinstance(ts_pipeline, TSPipeline)

        # use raw base model to predic and evaluate
        tsdata_valid.roll(lookback=best_config["past_seq_len"],
                          horizon=0,
                          feature_col=best_config["selected_features"])
        x_valid, y_valid = tsdata_valid.to_numpy()
        y_pred_raw = best_model.predict(x_valid)
        y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw)

        # use tspipeline to predic and evaluate
        eval_result = ts_pipeline.evaluate(tsdata_valid)
        y_pred = ts_pipeline.predict(tsdata_valid)

        # check if they are the same
        np.testing.assert_almost_equal(y_pred, y_pred_raw)

        # save and load
        ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_seq2seq")
        new_ts_pipeline = TSPipeline.load(
            "/tmp/auto_trainer/autots_tmp_model_seq2seq")

        # check if load ppl is the same as previous
        eval_result_new = new_ts_pipeline.evaluate(tsdata_valid)
        y_pred_new = new_ts_pipeline.predict(tsdata_valid)
        np.testing.assert_almost_equal(eval_result[0], eval_result_new[0])
        np.testing.assert_almost_equal(y_pred, y_pred_new)

        # check if load ppl is the same as previous with onnx
        try:
            import onnx
            import onnxruntime
            eval_result_new_onnx = new_ts_pipeline.evaluate_with_onnx(
                tsdata_valid)
            y_pred_new_onnx = new_ts_pipeline.predict_with_onnx(tsdata_valid)
            np.testing.assert_almost_equal(eval_result[0],
                                           eval_result_new_onnx[0],
                                           decimal=5)
            np.testing.assert_almost_equal(y_pred, y_pred_new_onnx, decimal=5)
        except ImportError:
            pass

        # use tspipeline to incrementally train
        new_ts_pipeline.fit(tsdata_valid)