def search_space(self):
     from zoo.orca.automl import hp
     return {
         "hidden_size": hp.choice([5, 10]),
         "lr": hp.choice([0.001, 0.003, 0.01]),
         "batch_size": hp.choice([32, 64])
     }
Пример #2
0
    def test_fit_data_creator(self):
        auto_lstm = AutoLSTM(input_feature_num=input_feature_dim,
                             output_target_num=output_feature_dim,
                             past_seq_len=5,
                             optimizer='Adam',
                             loss=torch.nn.MSELoss(),
                             metric="mse",
                             hidden_dim=hp.grid_search([32, 64]),
                             layer_num=hp.randint(1, 3),
                             lr=hp.choice([0.001, 0.003, 0.01]),
                             dropout=hp.uniform(0.1, 0.2),
                             logs_dir="/tmp/auto_lstm",
                             cpus_per_trial=2,
                             name="auto_lstm")

        auto_lstm.fit(data=train_dataloader_creator,
                      epochs=1,
                      batch_size=hp.choice([32, 64]),
                      validation_data=valid_dataloader_creator,
                      n_sampling=1,
                      )
        assert auto_lstm.get_best_model()
        best_config = auto_lstm.get_best_config()
        assert 0.1 <= best_config['dropout'] <= 0.2
        assert best_config['batch_size'] in (32, 64)
        assert 1 <= best_config['layer_num'] < 3
Пример #3
0
    def __init__(self,
                 num_rand_samples=1,
                 training_iteration=40,
                 batch_size=[256, 512],
                 hidden_size=[32, 48],
                 levels=[6, 8],
                 kernel_size=[3, 5],
                 dropout=[0, 0.1],
                 lr=[0.001, 0.003]):
        """
        Constructor.

        :param num_rand_samples: number of hyper-param configurations sampled randomly
        :param training_iteration: no. of iterations for training (n epochs) in trials
        :param batch_size: grid search candidates for batch size
        :param hidden_size: grid search candidates for hidden size of each layer
        :param levels: the number of layers
        :param kernel_size: the kernel size of each layer
        :param dropout: dropout rate (1 - keep probability)
        :param lr: learning rate
        """
        super(self.__class__, self).__init__()
        # -- run time params
        self.num_samples = num_rand_samples
        self.training_iteration = training_iteration

        # -- optimization params
        self.lr = hp.choice(lr)
        self.batch_size = hp.grid_search(batch_size)

        # ---- model params
        self.hidden_size = hp.grid_search(hidden_size)
        self.levels = hp.grid_search(levels)
        self.kernel_size = hp.grid_search(kernel_size)
        self.dropout = hp.choice(dropout)
Пример #4
0
 def search_space(self):
     return {
         "model":
         "MTNet",
         "lr":
         0.001,
         "batch_size":
         16,
         "epochs":
         1,
         "cnn_dropout":
         0.2,
         "rnn_dropout":
         0.2,
         "time_step":
         hp.choice([3, 4]),
         "cnn_height":
         2,
         "long_num":
         hp.choice([3, 4]),
         "ar_size":
         hp.choice([2, 3]),
         "past_seq_len":
         hp.sample_from(lambda spec:
                        (spec.config.long_num + 1) * spec.config.time_step),
     }
Пример #5
0
 def test_num_channels(self):
     auto_tcn = AutoTCN(input_feature_num=input_feature_dim,
                        output_target_num=output_feature_dim,
                        past_seq_len=past_seq_len,
                        future_seq_len=future_seq_len,
                        optimizer='Adam',
                        loss=torch.nn.MSELoss(),
                        metric="mse",
                        hidden_units=4,
                        levels=hp.randint(1, 3),
                        num_channels=[8] * 2,
                        kernel_size=hp.choice([2, 3]),
                        lr=hp.choice([0.001, 0.003, 0.01]),
                        dropout=hp.uniform(0.1, 0.2),
                        logs_dir="/tmp/auto_tcn",
                        cpus_per_trial=2,
                        name="auto_tcn")
     auto_tcn.fit(data=train_dataloader_creator,
                  epochs=1,
                  batch_size=hp.choice([32, 64]),
                  validation_data=valid_dataloader_creator,
                  n_sampling=1,
                  )
     assert auto_tcn.get_best_model()
     best_config = auto_tcn.get_best_config()
     assert best_config['num_channels'] == [8]*2
    def test_fit_lstm_data_creator(self):
        input_feature_dim = 4
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'layer_num': hp.randint(1, 3),
            'lr': hp.choice([0.001, 0.003, 0.01]),
            'dropout': hp.uniform(0.1, 0.2)
        }
        auto_trainer = AutoTSTrainer(model='lstm',
                                     search_space=search_space,
                                     past_seq_len=7,
                                     future_seq_len=1,
                                     input_feature_num=input_feature_dim,
                                     output_target_num=output_feature_dim,
                                     selected_features="auto",
                                     metric="mse",
                                     loss=torch.nn.MSELoss(),
                                     logs_dir="/tmp/auto_trainer",
                                     cpus_per_trial=2,
                                     name="auto_trainer")
        auto_trainer.fit(data=get_data_creator(),
                         epochs=1,
                         batch_size=hp.choice([32, 64]),
                         validation_data=get_data_creator(),
                         n_sampling=1)
        config = auto_trainer.get_best_config()
        assert config["past_seq_len"] == 7
Пример #7
0
def create_linear_search_space():
    return {
        "dropout": hp.uniform(0.2, 0.3),
        "fc1_size": hp.choice([50, 64]),
        "fc2_size": hp.choice([100, 128]),
        LR_NAME: hp.choice([0.001, 0.003, 0.01]),
        "batch_size": hp.choice([32, 64])
    }
Пример #8
0
 def search_space(self, all_available_features):
     from zoo.orca.automl import hp
     return {
         "dropout": hp.uniform(0.2, 0.3),
         "fc1_size": hp.choice([50, 64]),
         "fc2_size": hp.choice([100, 128]),
         LR_NAME: hp.choice([0.001, 0.003, 0.01]),
         "batch_size": hp.choice([32, 64])
     }
Пример #9
0
 def search_space(self):
     return {
         "model": "LSTM",
         "lstm_1_units": hp.choice([32, 64]),
         "dropout_1": hp.uniform(0.2, 0.5),
         "lstm_2_units": hp.choice([32, 64]),
         "dropout_2": hp.uniform(0.2, 0.5),
         "lr": 0.001,
         "batch_size": 1024,
         "epochs": 1,
         "past_seq_len": 2,
     }
Пример #10
0
 def search_space(self, all_available_features):
     return {
         "selected_features": json.dumps(all_available_features),
         "model": "LSTM",
         "lstm_1_units": hp.choice([32, 64]),
         "dropout_1": hp.uniform(0.2, 0.5),
         "lstm_2_units": hp.choice([32, 64]),
         "dropout_2": hp.uniform(0.2, 0.5),
         "lr": 0.001,
         "batch_size": 1024,
         "epochs": 1,
         "past_seq_len": 2,
     }
Пример #11
0
    def __init__(
            self,
            num_rand_samples=10,
            n_estimators_range=(50, 1000),
            max_depth_range=(2, 15),
            lr=(1e-4, 1e-1),
            min_child_weight=[1, 2, 3],
    ):
        """
        Constructor.

        :param num_rand_samples: number of hyper-param configurations sampled
          randomly
        :param n_estimators_range: range of number of gradient boosted trees.
        :param max_depth_range: range of max tree depth
        :param lr: learning rate
        :param min_child_weight: minimum sum of instance weight(hessian)
          needed in a child.
        """
        super(self.__class__, self).__init__()

        self.num_samples = num_rand_samples

        self.n_estimators_range = n_estimators_range
        self.max_depth_range = max_depth_range
        self.lr = hp.loguniform(lr[0], lr[1])
        self.min_child_weight = hp.choice(min_child_weight)
Пример #12
0
 def test_fit(self):
     data, validation_data = get_data()
     auto_arima = AutoARIMA(metric="mse",
                            p=hp.randint(0, 4),
                            q=hp.randint(0, 4),
                            seasonality_mode=hp.choice([True, False]),
                            P=hp.randint(5, 12),
                            Q=hp.randint(5, 12),
                            m=hp.choice([4, 7]))
     auto_arima.fit(
         data=data,
         validation_data=validation_data,
         epochs=1,
         n_sampling=1,
     )
     best_model = auto_arima.get_best_model()
def create_lstm_search_space(input_dim):
    return {
        "lr": hp.uniform(0.001, 0.01),
        "batch_size": hp.choice([32, 64]),
        "input_dim": input_dim,
        "output_dim": 1
    }
Пример #14
0
    def __init__(self,
                 num_rand_samples=1,
                 n_estimators=[8, 15],
                 max_depth=[10, 15],
                 n_jobs=-1,
                 tree_method='hist',
                 random_state=2,
                 seed=0,
                 lr=(1e-4, 1e-1),
                 subsample=0.8,
                 colsample_bytree=0.8,
                 min_child_weight=[1, 2, 3],
                 gamma=0,
                 reg_alpha=0,
                 reg_lambda=1):
        """
        """
        super(self.__class__, self).__init__()

        self.num_samples = num_rand_samples
        self.n_jobs = n_jobs
        self.tree_method = tree_method
        self.random_state = random_state
        self.seed = seed

        self.colsample_bytree = colsample_bytree
        self.gamma = gamma
        self.reg_alpha = reg_alpha
        self.reg_lambda = reg_lambda

        self.n_estimators = hp.grid_search(n_estimators)
        self.max_depth = hp.grid_search(max_depth)
        self.lr = hp.loguniform(lr[0], lr[-1])
        self.subsample = subsample
        self.min_child_weight = hp.choice(min_child_weight)
    def test_fit_third_party_data_creator(self):
        input_feature_dim = 4
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        auto_estimator = AutoTSEstimator(model=model_creator,
                                         search_space=search_space,
                                         past_seq_len=7,
                                         future_seq_len=1,
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2)

        auto_estimator.fit(data=get_data_creator(),
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=get_data_creator(),
                           n_sampling=1)

        config = auto_estimator.get_best_config()
        assert config["past_seq_len"] == 7
    def test_fit_third_party_feature(self):
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True)
        tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler,
                                                              fit=False)

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        auto_estimator = AutoTSEstimator(model=model_creator,
                                         search_space=search_space,
                                         past_seq_len=hp.randint(4, 6),
                                         future_seq_len=1,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2)

        ts_pipeline = auto_estimator.fit(data=tsdata_train,
                                         epochs=1,
                                         batch_size=hp.choice([32, 64]),
                                         validation_data=tsdata_valid,
                                         n_sampling=1)
        best_config = auto_estimator.get_best_config()
        best_model = auto_estimator._get_best_automl_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        assert isinstance(ts_pipeline, TSPipeline)

        # use raw base model to predic and evaluate
        tsdata_valid.roll(lookback=best_config["past_seq_len"],
                          horizon=0,
                          feature_col=best_config["selected_features"])
        x_valid, y_valid = tsdata_valid.to_numpy()
        y_pred_raw = best_model.predict(x_valid)
        y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw)

        # use tspipeline to predic and evaluate
        eval_result = ts_pipeline.evaluate(tsdata_valid)
        y_pred = ts_pipeline.predict(tsdata_valid)

        # check if they are the same
        np.testing.assert_almost_equal(y_pred, y_pred_raw)

        # save and load
        ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_3rdparty")
        new_ts_pipeline = TSPipeline.load(
            "/tmp/auto_trainer/autots_tmp_model_3rdparty")

        # check if load ppl is the same as previous
        eval_result_new = new_ts_pipeline.evaluate(tsdata_valid)
        y_pred_new = new_ts_pipeline.predict(tsdata_valid)
        np.testing.assert_almost_equal(eval_result[0], eval_result_new[0])
        np.testing.assert_almost_equal(y_pred, y_pred_new)

        # use tspipeline to incrementally train
        new_ts_pipeline.fit(tsdata_valid)
Пример #17
0
 def search_space(self):
     return {
         "lr": hp.uniform(0.001, 0.01),
         "batch_size": hp.choice([32, 64]),
         "input_dim": input_dim,
         "output_dim": 1
     }
Пример #18
0
    def __init__(self,
                 num_rand_samples=1,
                 epochs=5,
                 training_iteration=10,
                 look_back=2,
                 latent_dim=[32, 64, 128, 256],
                 batch_size=[32, 64]):
        """
        Constructor.

        :param lstm_1_units: random search candidates for num of lstm_1_units
        :param lstm_2_units: grid search candidates for num of lstm_1_units
        :param batch_size: grid search candidates for batch size
        :param num_rand_samples: number of hyper-param configurations sampled randomly
        :param look_back: the length to look back, either a tuple with 2 int values,
          which is in format is (min len, max len), or a single int, which is
          a fixed length to look back.
        :param training_iteration: no. of iterations for training (n epochs) in trials
        :param epochs: no. of epochs to train in each iteration
        """
        super(self.__class__, self).__init__()
        # -- runtime params
        self.num_samples = num_rand_samples
        self.training_iteration = training_iteration

        # -- model params
        self.past_seq_config = PastSeqParamHandler.get_past_seq_config(
            look_back)
        self.latent_dim = hp.choice(latent_dim)
        self.dropout_config = hp.uniform(0.2, 0.5)

        # -- optimization params
        self.lr = hp.uniform(0.001, 0.01)
        self.batch_size = hp.grid_search(batch_size)
        self.epochs = epochs
Пример #19
0
def get_auto_estimator():
    auto_tcn = AutoTCN(input_feature_num=input_feature_dim,
                       output_target_num=output_feature_dim,
                       past_seq_len=past_seq_len,
                       future_seq_len=future_seq_len,
                       optimizer='Adam',
                       loss=torch.nn.MSELoss(),
                       metric="mse",
                       hidden_units=8,
                       levels=hp.randint(1, 3),
                       kernel_size=hp.choice([2, 3]),
                       lr=hp.choice([0.001, 0.003, 0.01]),
                       dropout=hp.uniform(0.1, 0.2),
                       logs_dir="/tmp/auto_tcn",
                       cpus_per_trial=2,
                       name="auto_tcn")
    return auto_tcn
Пример #20
0
    def __init__(self,
                 num_rand_samples=1,
                 n_estimators=[8, 15],
                 max_depth=[10, 15],
                 n_jobs=-1,
                 tree_method='hist',
                 random_state=2,
                 seed=0,
                 lr=(1e-4, 1e-1),
                 subsample=0.8,
                 colsample_bytree=0.8,
                 min_child_weight=[1, 2, 3],
                 gamma=0,
                 reg_alpha=0,
                 reg_lambda=1):
        """
        Constructor. For XGBoost hyper parameters, refer to
        https://xgboost.readthedocs.io/en/latest/python/python_api.html for
        details.

        :param num_rand_samples: number of hyper-param configurations sampled
          randomly
        :param n_estimators: number of gradient boosted trees.
        :param max_depth: max tree depth
        :param n_jobs: number of parallel threads used to run xgboost.
        :param tree_method: specify which tree method to use.
        :param random_state: random number seed.
        :param seed: seed used to generate the folds
        :param lr: learning rate
        :param subsample: subsample ratio of the training instance
        :param colsample_bytree: subsample ratio of columns when constructing
          each tree.
        :param min_child_weight: minimum sum of instance weight(hessian)
          needed in a child.
        :param gamma: minimum loss reduction required to make a further
          partition on a leaf node of the tree.
        :param reg_alpha: L1 regularization term on weights (xgb’s alpha).
        :param reg_lambda: L2 regularization term on weights (xgb’s lambda).

        """
        super(self.__class__, self).__init__()

        self.num_samples = num_rand_samples
        self.n_jobs = n_jobs
        self.tree_method = tree_method
        self.random_state = random_state
        self.seed = seed

        self.colsample_bytree = colsample_bytree
        self.gamma = gamma
        self.reg_alpha = reg_alpha
        self.reg_lambda = reg_lambda

        self.n_estimators = hp.grid_search(n_estimators)
        self.max_depth = hp.grid_search(max_depth)
        self.lr = hp.loguniform(lr[0], lr[-1])
        self.subsample = subsample
        self.min_child_weight = hp.choice(min_child_weight)
    def test_select_feature(self):
        sample_num = np.random.randint(100, 200)
        df = pd.DataFrame({
            "datetime":
            pd.date_range('1/1/2019', periods=sample_num),
            "value":
            np.random.randn(sample_num),
            "id":
            np.array(['00'] * sample_num)
        })
        train_ts, val_ts, _ = TSDataset.from_pandas(df,
                                                    target_col=['value'],
                                                    dt_col='datetime',
                                                    id_col='id',
                                                    with_split=True,
                                                    val_ratio=0.1)

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'layer_num': hp.randint(1, 3),
            'lr': hp.choice([0.001, 0.003, 0.01]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        input_feature_dim, output_feature_dim = 1, 1
        auto_estimator = AutoTSEstimator(model='lstm',
                                         search_space=search_space,
                                         past_seq_len=6,
                                         future_seq_len=1,
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2,
                                         name="auto_trainer")

        auto_estimator.fit(data=train_ts,
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=val_ts,
                           n_sampling=1)
        config = auto_estimator.get_best_config()
        assert config['past_seq_len'] == 6
Пример #22
0
    def __init__(self,
                 num_rand_samples=1,
                 epochs=5,
                 training_iteration=10,
                 time_step=[3, 4],
                 long_num=[3, 4],
                 cnn_height=[2, 3],
                 cnn_hid_size=[32, 50, 100],
                 ar_size=[2, 3],
                 batch_size=[32, 64]):
        """
        __init__()
        Constructor.

        :param num_rand_samples: number of hyper-param configurations sampled randomly
        :param training_iteration: no. of iterations for training (n epochs) in trials
        :param epochs: no. of epochs to train in each iteration
        :param time_step: random search candidates for model param "time_step"
        :param long_num: random search candidates for model param "long_num"
        :param ar_size: random search candidates for model param "ar_size"
        :param batch_size: grid search candidates for batch size
        :param cnn_height: random search candidates for model param "cnn_height"
        :param cnn_hid_size: random search candidates for model param "cnn_hid_size"
        """
        super(self.__class__, self).__init__()
        # -- run time params
        self.num_samples = num_rand_samples
        self.training_iteration = training_iteration

        # -- optimization params
        self.lr = hp.uniform(0.001, 0.01)
        self.batch_size = hp.grid_search(batch_size)
        self.epochs = epochs

        # ---- model params
        self.cnn_dropout = hp.uniform(0.2, 0.5)
        self.rnn_dropout = hp.uniform(0.2, 0.5)
        self.time_step = hp.choice(time_step)
        self.long_num = hp.choice(long_num, )
        self.cnn_height = hp.choice(cnn_height)
        self.cnn_hid_size = hp.choice(cnn_hid_size)
        self.ar_size = hp.choice(ar_size)
        self.past_seq_len = hp.sample_from(
            lambda spec: (spec.config.long_num + 1) * spec.config.time_step)
Пример #23
0
 def search_space(self):
     return {
         # -------- feature related parameters
         "model": "XGBRegressor",
         "imputation": hp.choice(["LastFillImpute", "FillZeroImpute"]),
         "n_estimators": self.n_estimators,
         "max_depth": self.max_depth,
         "min_child_weight": self.min_child_weight,
         "lr": self.lr
     }
Пример #24
0
    def search_space(self):
        return {
            "model": hp.choice(["LSTM", "Seq2seq"]),
            # --------- Vanilla LSTM model parameters
            "lstm_1_units": hp.choice([8, 16, 32, 64, 128]),
            "dropout_1": hp.uniform(0.2, 0.5),
            "lstm_2_units": hp.choice([8, 16, 32, 64, 128]),
            "dropout_2": hp.uniform(0.2, 0.5),

            # ----------- Seq2Seq model parameters
            "latent_dim": hp.choice([32, 64, 128, 256]),
            "dropout": hp.uniform(0.2, 0.5),

            # ----------- optimization parameters
            "lr": hp.uniform(0.001, 0.01),
            "batch_size": hp.choice([32, 64, 1024]),
            "epochs": self.epochs,
            "past_seq_len": self.past_seq_config,
        }
Пример #25
0
    def search_space(self, all_available_features):
        return {
            # -------- feature related parameters
            "selected_features":
            hp.sample_from(lambda spec: json.dumps(
                list(
                    np.random.choice(all_available_features,
                                     size=np.random.randint(
                                         low=3,
                                         high=len(all_available_features)),
                                     replace=False)))),

            # -------- model selection TODO add MTNet
            "model":
            hp.choice(["LSTM", "Seq2seq"]),

            # --------- Vanilla LSTM model parameters
            "lstm_1_units":
            hp.grid_search([16, 32]),
            "dropout_1":
            0.2,
            "lstm_2_units":
            hp.grid_search([16, 32]),
            "dropout_2":
            hp.uniform(0.2, 0.5),

            # ----------- Seq2Seq model parameters
            "latent_dim":
            hp.grid_search([32, 64]),
            "dropout":
            hp.uniform(0.2, 0.5),

            # ----------- optimization parameters
            "lr":
            hp.uniform(0.001, 0.01),
            "batch_size":
            hp.choice([32, 64]),
            "epochs":
            self.epochs,
            "past_seq_len":
            self.past_seq_config,
        }
Пример #26
0
    def search_space(self):
        return {
            # -------- model selection TODO add MTNet
            "model": hp.choice(["LSTM", "Seq2seq"]),

            # --------- Vanilla LSTM model parameters
            "lstm_1_units": hp.grid_search([16, 32]),
            "dropout_1": 0.2,
            "lstm_2_units": hp.grid_search([16, 32]),
            "dropout_2": hp.uniform(0.2, 0.5),

            # ----------- Seq2Seq model parameters
            "latent_dim": hp.grid_search([32, 64]),
            "dropout": hp.uniform(0.2, 0.5),

            # ----------- optimization parameters
            "lr": hp.uniform(0.001, 0.01),
            "batch_size": hp.choice([32, 64]),
            "epochs": self.epochs,
            "past_seq_len": self.past_seq_config,
        }
Пример #27
0
 def test_fit_np(self):
     auto_lstm = get_auto_estimator()
     auto_lstm.fit(data=get_x_y(size=1000),
                   epochs=1,
                   batch_size=hp.choice([32, 64]),
                   validation_data=get_x_y(size=400),
                   n_sampling=1)
     assert auto_lstm.get_best_model()
     best_config = auto_lstm.get_best_config()
     assert 0.1 <= best_config['dropout'] <= 0.2
     assert best_config['batch_size'] in (32, 64)
     assert 1 <= best_config['layer_num'] < 3
Пример #28
0
 def test_fit_data_creator(self):
     auto_lstm = get_auto_estimator()
     auto_lstm.fit(data=train_dataloader_creator,
                   epochs=1,
                   batch_size=hp.choice([32, 64]),
                   validation_data=valid_dataloader_creator,
                   n_sampling=1)
     assert auto_lstm.get_best_model()
     best_config = auto_lstm.get_best_config()
     assert 0.1 <= best_config['dropout'] <= 0.2
     assert best_config['batch_size'] in (32, 64)
     assert 1 <= best_config['layer_num'] < 3
Пример #29
0
    def test_fit_tcn_feature(self):
        input_feature_dim = 11  # This param will not be used
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        tsdata_train = get_tsdataset().gen_dt_feature()
        tsdata_valid = get_tsdataset().gen_dt_feature()
        tsdata_test = get_tsdataset().gen_dt_feature()

        search_space = {
            'hidden_units': hp.grid_search([32, 64]),
            'levels': hp.randint(4, 6),
            'kernel_size': hp.randint(3, 5),
            'dropout': hp.uniform(0.1, 0.2),
            'lr': hp.loguniform(0.001, 0.01)
        }
        auto_trainer = AutoTSTrainer(model='tcn',
                                     search_space=search_space,
                                     past_seq_len=hp.randint(4, 6),
                                     future_seq_len=1,
                                     input_feature_num=input_feature_dim,
                                     output_target_num=output_feature_dim,
                                     selected_features="auto",
                                     metric="mse",
                                     optimizer="Adam",
                                     loss=torch.nn.MSELoss(),
                                     logs_dir="/tmp/auto_trainer",
                                     cpus_per_trial=2,
                                     name="auto_trainer")
        auto_trainer.fit(data=tsdata_train,
                         epochs=1,
                         batch_size=hp.choice([32, 64]),
                         validation_data=tsdata_valid,
                         n_sampling=1)
        best_config = auto_trainer.get_best_config()
        best_model = auto_trainer.get_best_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        # really difficult to use the model currently...
        tsdata_test.roll(lookback=best_config["past_seq_len"],
                         horizon=1,
                         feature_col=best_config["selected_features"])
        x_test, y_test = tsdata_test.to_numpy()
        y_pred = best_model.predict(x_test)
        best_model.save("best.ckpt")
        from zoo.automl.model.base_pytorch_model import PytorchModelBuilder
        restore_model = PytorchModelBuilder(
            model_creator=best_model.model_creator,
            optimizer_creator="Adam",
            loss_creator=torch.nn.MSELoss()).build(best_config)
        restore_model.restore("best.ckpt")
        y_pred_restore = restore_model.predict(x_test)
        np.testing.assert_almost_equal(y_pred, y_pred_restore)
Пример #30
0
 def test_fit_np(self):
     auto_tcn = get_auto_estimator()
     auto_tcn.fit(
         data=get_x_y(size=1000),
         epochs=1,
         batch_size=hp.choice([32, 64]),
         validation_data=get_x_y(size=400),
         n_sampling=1,
     )
     best_model = auto_tcn.get_best_model()
     assert 0.1 <= best_model.config['dropout'] <= 0.2
     assert best_model.config['batch_size'] in (32, 64)
     assert 1 <= best_model.config['levels'] < 3