示例#1
0
    def __init__(self,
                 num_rand_samples=1,
                 n_estimators=[8, 15],
                 max_depth=[10, 15],
                 n_jobs=-1,
                 tree_method='hist',
                 random_state=2,
                 seed=0,
                 lr=(1e-4, 1e-1),
                 subsample=0.8,
                 colsample_bytree=0.8,
                 min_child_weight=[1, 2, 3],
                 gamma=0,
                 reg_alpha=0,
                 reg_lambda=1):
        """
        """
        super(self.__class__, self).__init__()

        self.num_samples = num_rand_samples
        self.n_jobs = n_jobs
        self.tree_method = tree_method
        self.random_state = random_state
        self.seed = seed

        self.colsample_bytree = colsample_bytree
        self.gamma = gamma
        self.reg_alpha = reg_alpha
        self.reg_lambda = reg_lambda

        self.n_estimators = hp.grid_search(n_estimators)
        self.max_depth = hp.grid_search(max_depth)
        self.lr = hp.loguniform(lr[0], lr[-1])
        self.subsample = subsample
        self.min_child_weight = hp.choice(min_child_weight)
示例#2
0
    def __init__(self,
                 num_rand_samples=1,
                 training_iteration=40,
                 batch_size=[256, 512],
                 hidden_size=[32, 48],
                 levels=[6, 8],
                 kernel_size=[3, 5],
                 dropout=[0, 0.1],
                 lr=[0.001, 0.003]):
        """
        Constructor.

        :param num_rand_samples: number of hyper-param configurations sampled randomly
        :param training_iteration: no. of iterations for training (n epochs) in trials
        :param batch_size: grid search candidates for batch size
        :param hidden_size: grid search candidates for hidden size of each layer
        :param levels: the number of layers
        :param kernel_size: the kernel size of each layer
        :param dropout: dropout rate (1 - keep probability)
        :param lr: learning rate
        """
        super(self.__class__, self).__init__()
        # -- run time params
        self.num_samples = num_rand_samples
        self.training_iteration = training_iteration

        # -- optimization params
        self.lr = hp.choice(lr)
        self.batch_size = hp.grid_search(batch_size)

        # ---- model params
        self.hidden_size = hp.grid_search(hidden_size)
        self.levels = hp.grid_search(levels)
        self.kernel_size = hp.grid_search(kernel_size)
        self.dropout = hp.choice(dropout)
示例#3
0
    def __init__(self,
                 num_rand_samples=1,
                 n_estimators=[8, 15],
                 max_depth=[10, 15],
                 n_jobs=-1,
                 tree_method='hist',
                 random_state=2,
                 seed=0,
                 lr=(1e-4, 1e-1),
                 subsample=0.8,
                 colsample_bytree=0.8,
                 min_child_weight=[1, 2, 3],
                 gamma=0,
                 reg_alpha=0,
                 reg_lambda=1):
        """
        Constructor. For XGBoost hyper parameters, refer to
        https://xgboost.readthedocs.io/en/latest/python/python_api.html for
        details.

        :param num_rand_samples: number of hyper-param configurations sampled
          randomly
        :param n_estimators: number of gradient boosted trees.
        :param max_depth: max tree depth
        :param n_jobs: number of parallel threads used to run xgboost.
        :param tree_method: specify which tree method to use.
        :param random_state: random number seed.
        :param seed: seed used to generate the folds
        :param lr: learning rate
        :param subsample: subsample ratio of the training instance
        :param colsample_bytree: subsample ratio of columns when constructing
          each tree.
        :param min_child_weight: minimum sum of instance weight(hessian)
          needed in a child.
        :param gamma: minimum loss reduction required to make a further
          partition on a leaf node of the tree.
        :param reg_alpha: L1 regularization term on weights (xgb’s alpha).
        :param reg_lambda: L2 regularization term on weights (xgb’s lambda).

        """
        super(self.__class__, self).__init__()

        self.num_samples = num_rand_samples
        self.n_jobs = n_jobs
        self.tree_method = tree_method
        self.random_state = random_state
        self.seed = seed

        self.colsample_bytree = colsample_bytree
        self.gamma = gamma
        self.reg_alpha = reg_alpha
        self.reg_lambda = reg_lambda

        self.n_estimators = hp.grid_search(n_estimators)
        self.max_depth = hp.grid_search(max_depth)
        self.lr = hp.loguniform(lr[0], lr[-1])
        self.subsample = subsample
        self.min_child_weight = hp.choice(min_child_weight)
    def test_fit_data_creator(self):
        auto_lstm = AutoLSTM(input_feature_num=input_feature_dim,
                             output_target_num=output_feature_dim,
                             past_seq_len=5,
                             optimizer='Adam',
                             loss=torch.nn.MSELoss(),
                             metric="mse",
                             hidden_dim=hp.grid_search([32, 64]),
                             layer_num=hp.randint(1, 3),
                             lr=hp.choice([0.001, 0.003, 0.01]),
                             dropout=hp.uniform(0.1, 0.2),
                             logs_dir="/tmp/auto_lstm",
                             cpus_per_trial=2,
                             name="auto_lstm")

        auto_lstm.fit(data=train_dataloader_creator,
                      epochs=1,
                      batch_size=hp.choice([32, 64]),
                      validation_data=valid_dataloader_creator,
                      n_sampling=1,
                      )
        assert auto_lstm.get_best_model()
        best_config = auto_lstm.get_best_config()
        assert 0.1 <= best_config['dropout'] <= 0.2
        assert best_config['batch_size'] in (32, 64)
        assert 1 <= best_config['layer_num'] < 3
    def test_fit_third_party_data_creator(self):
        input_feature_dim = 4
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        auto_estimator = AutoTSEstimator(model=model_creator,
                                         search_space=search_space,
                                         past_seq_len=7,
                                         future_seq_len=1,
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2)

        auto_estimator.fit(data=get_data_creator(),
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=get_data_creator(),
                           n_sampling=1)

        config = auto_estimator.get_best_config()
        assert config["past_seq_len"] == 7
    def test_fit_third_party_feature(self):
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True)
        tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler,
                                                              fit=False)

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        auto_estimator = AutoTSEstimator(model=model_creator,
                                         search_space=search_space,
                                         past_seq_len=hp.randint(4, 6),
                                         future_seq_len=1,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2)

        ts_pipeline = auto_estimator.fit(data=tsdata_train,
                                         epochs=1,
                                         batch_size=hp.choice([32, 64]),
                                         validation_data=tsdata_valid,
                                         n_sampling=1)
        best_config = auto_estimator.get_best_config()
        best_model = auto_estimator._get_best_automl_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        assert isinstance(ts_pipeline, TSPipeline)

        # use raw base model to predic and evaluate
        tsdata_valid.roll(lookback=best_config["past_seq_len"],
                          horizon=0,
                          feature_col=best_config["selected_features"])
        x_valid, y_valid = tsdata_valid.to_numpy()
        y_pred_raw = best_model.predict(x_valid)
        y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw)

        # use tspipeline to predic and evaluate
        eval_result = ts_pipeline.evaluate(tsdata_valid)
        y_pred = ts_pipeline.predict(tsdata_valid)

        # check if they are the same
        np.testing.assert_almost_equal(y_pred, y_pred_raw)

        # save and load
        ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_3rdparty")
        new_ts_pipeline = TSPipeline.load(
            "/tmp/auto_trainer/autots_tmp_model_3rdparty")

        # check if load ppl is the same as previous
        eval_result_new = new_ts_pipeline.evaluate(tsdata_valid)
        y_pred_new = new_ts_pipeline.predict(tsdata_valid)
        np.testing.assert_almost_equal(eval_result[0], eval_result_new[0])
        np.testing.assert_almost_equal(y_pred, y_pred_new)

        # use tspipeline to incrementally train
        new_ts_pipeline.fit(tsdata_valid)
示例#7
0
    def __init__(self,
                 num_rand_samples=1,
                 epochs=5,
                 training_iteration=10,
                 look_back=2,
                 latent_dim=[32, 64, 128, 256],
                 batch_size=[32, 64]):
        """
        Constructor.

        :param lstm_1_units: random search candidates for num of lstm_1_units
        :param lstm_2_units: grid search candidates for num of lstm_1_units
        :param batch_size: grid search candidates for batch size
        :param num_rand_samples: number of hyper-param configurations sampled randomly
        :param look_back: the length to look back, either a tuple with 2 int values,
          which is in format is (min len, max len), or a single int, which is
          a fixed length to look back.
        :param training_iteration: no. of iterations for training (n epochs) in trials
        :param epochs: no. of epochs to train in each iteration
        """
        super(self.__class__, self).__init__()
        # -- runtime params
        self.num_samples = num_rand_samples
        self.training_iteration = training_iteration

        # -- model params
        self.past_seq_config = PastSeqParamHandler.get_past_seq_config(
            look_back)
        self.latent_dim = hp.choice(latent_dim)
        self.dropout_config = hp.uniform(0.2, 0.5)

        # -- optimization params
        self.lr = hp.uniform(0.001, 0.01)
        self.batch_size = hp.grid_search(batch_size)
        self.epochs = epochs
    def test_fit_lstm_data_creator(self):
        input_feature_dim = 4
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'layer_num': hp.randint(1, 3),
            'lr': hp.choice([0.001, 0.003, 0.01]),
            'dropout': hp.uniform(0.1, 0.2)
        }
        auto_trainer = AutoTSTrainer(model='lstm',
                                     search_space=search_space,
                                     past_seq_len=7,
                                     future_seq_len=1,
                                     input_feature_num=input_feature_dim,
                                     output_target_num=output_feature_dim,
                                     selected_features="auto",
                                     metric="mse",
                                     loss=torch.nn.MSELoss(),
                                     logs_dir="/tmp/auto_trainer",
                                     cpus_per_trial=2,
                                     name="auto_trainer")
        auto_trainer.fit(data=get_data_creator(),
                         epochs=1,
                         batch_size=hp.choice([32, 64]),
                         validation_data=get_data_creator(),
                         n_sampling=1)
        config = auto_trainer.get_best_config()
        assert config["past_seq_len"] == 7
示例#9
0
 def _prepare_tune_config(self, space):
     tune_config = {}
     for k, v in space.items():
         if isinstance(v, RandomSample):
             tune_config[k] = hp.sample_from(v.func)
         elif isinstance(v, GridSearch):
             tune_config[k] = hp.grid_search(v.values)
         else:
             tune_config[k] = v
     return tune_config
示例#10
0
    def search_space(self, all_available_features):
        return {
            # -------- feature related parameters
            "selected_features":
            hp.sample_from(lambda spec: json.dumps(
                list(
                    np.random.choice(all_available_features,
                                     size=np.random.randint(
                                         low=3,
                                         high=len(all_available_features)),
                                     replace=False)))),

            # -------- model selection TODO add MTNet
            "model":
            hp.choice(["LSTM", "Seq2seq"]),

            # --------- Vanilla LSTM model parameters
            "lstm_1_units":
            hp.grid_search([16, 32]),
            "dropout_1":
            0.2,
            "lstm_2_units":
            hp.grid_search([16, 32]),
            "dropout_2":
            hp.uniform(0.2, 0.5),

            # ----------- Seq2Seq model parameters
            "latent_dim":
            hp.grid_search([32, 64]),
            "dropout":
            hp.uniform(0.2, 0.5),

            # ----------- optimization parameters
            "lr":
            hp.uniform(0.001, 0.01),
            "batch_size":
            hp.choice([32, 64]),
            "epochs":
            self.epochs,
            "past_seq_len":
            self.past_seq_config,
        }
示例#11
0
    def search_space(self):
        return {
            # -------- model selection TODO add MTNet
            "model": hp.choice(["LSTM", "Seq2seq"]),

            # --------- Vanilla LSTM model parameters
            "lstm_1_units": hp.grid_search([16, 32]),
            "dropout_1": 0.2,
            "lstm_2_units": hp.grid_search([16, 32]),
            "dropout_2": hp.uniform(0.2, 0.5),

            # ----------- Seq2Seq model parameters
            "latent_dim": hp.grid_search([32, 64]),
            "dropout": hp.uniform(0.2, 0.5),

            # ----------- optimization parameters
            "lr": hp.uniform(0.001, 0.01),
            "batch_size": hp.choice([32, 64]),
            "epochs": self.epochs,
            "past_seq_len": self.past_seq_config,
        }
示例#12
0
    def test_fit_tcn_feature(self):
        input_feature_dim = 11  # This param will not be used
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        tsdata_train = get_tsdataset().gen_dt_feature()
        tsdata_valid = get_tsdataset().gen_dt_feature()
        tsdata_test = get_tsdataset().gen_dt_feature()

        search_space = {
            'hidden_units': hp.grid_search([32, 64]),
            'levels': hp.randint(4, 6),
            'kernel_size': hp.randint(3, 5),
            'dropout': hp.uniform(0.1, 0.2),
            'lr': hp.loguniform(0.001, 0.01)
        }
        auto_trainer = AutoTSTrainer(model='tcn',
                                     search_space=search_space,
                                     past_seq_len=hp.randint(4, 6),
                                     future_seq_len=1,
                                     input_feature_num=input_feature_dim,
                                     output_target_num=output_feature_dim,
                                     selected_features="auto",
                                     metric="mse",
                                     optimizer="Adam",
                                     loss=torch.nn.MSELoss(),
                                     logs_dir="/tmp/auto_trainer",
                                     cpus_per_trial=2,
                                     name="auto_trainer")
        auto_trainer.fit(data=tsdata_train,
                         epochs=1,
                         batch_size=hp.choice([32, 64]),
                         validation_data=tsdata_valid,
                         n_sampling=1)
        best_config = auto_trainer.get_best_config()
        best_model = auto_trainer.get_best_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        # really difficult to use the model currently...
        tsdata_test.roll(lookback=best_config["past_seq_len"],
                         horizon=1,
                         feature_col=best_config["selected_features"])
        x_test, y_test = tsdata_test.to_numpy()
        y_pred = best_model.predict(x_test)
        best_model.save("best.ckpt")
        from zoo.automl.model.base_pytorch_model import PytorchModelBuilder
        restore_model = PytorchModelBuilder(
            model_creator=best_model.model_creator,
            optimizer_creator="Adam",
            loss_creator=torch.nn.MSELoss()).build(best_config)
        restore_model.restore("best.ckpt")
        y_pred_restore = restore_model.predict(x_test)
        np.testing.assert_almost_equal(y_pred, y_pred_restore)
示例#13
0
def get_auto_estimator():
    auto_lstm = AutoLSTM(input_feature_num=input_feature_dim,
                         output_target_num=output_feature_dim,
                         past_seq_len=5,
                         optimizer='Adam',
                         loss=torch.nn.MSELoss(),
                         metric="mse",
                         hidden_dim=hp.grid_search([32, 64]),
                         layer_num=hp.randint(1, 3),
                         lr=hp.choice([0.001, 0.003, 0.01]),
                         dropout=hp.uniform(0.1, 0.2),
                         logs_dir="/tmp/auto_lstm",
                         cpus_per_trial=2,
                         name="auto_lstm")
    return auto_lstm
示例#14
0
 def _gen_sample_func(self, ranges, param_name):
     if isinstance(ranges, tuple):
         assert len(ranges) == 2, \
             f"length of tuple {param_name} should be 2 while get {len(ranges)} instead."
         assert param_name != "teacher_forcing", \
             f"type of {param_name} can only be a list while get a tuple"
         if param_name in ["lr"]:
             return hp.loguniform(lower=ranges[0], upper=ranges[1])
         if param_name in [
                 "lstm_hidden_dim", "lstm_layer_num", "batch_size"
         ]:
             return hp.randint(lower=ranges[0], upper=ranges[1])
         if param_name in ["dropout"]:
             return hp.uniform(lower=ranges[0], upper=ranges[1])
     if isinstance(ranges, list):
         return hp.grid_search(ranges)
     raise RuntimeError(f"{param_name} should be either a list or a tuple.")
示例#15
0
def get_auto_estimator():
    auto_seq2seq = AutoSeq2Seq(input_feature_num=input_feature_dim,
                               output_target_num=output_feature_dim,
                               past_seq_len=past_seq_len,
                               future_seq_len=future_seq_len,
                               optimizer='Adam',
                               loss=torch.nn.MSELoss(),
                               metric="mse",
                               lr=hp.choice([0.001, 0.003, 0.01]),
                               lstm_hidden_dim=hp.grid_search([32, 64, 128]),
                               lstm_layer_num=hp.randint(1, 4),
                               dropout=hp.uniform(0.1, 0.3),
                               teacher_forcing=False,
                               logs_dir="/tmp/auto_seq2seq",
                               cpus_per_trial=2,
                               name="auto_seq2seq")
    return auto_seq2seq
 def test_fit_data_creator(self):
     auto_est = AutoEstimator.from_torch(model_creator=model_creator,
                                         optimizer=get_optimizer,
                                         loss=nn.BCELoss(),
                                         logs_dir="/tmp/zoo_automl_logs",
                                         resources_per_trial={"cpu": 2},
                                         name="test_fit")
     search_space = create_linear_search_space()
     search_space.update({"shuffle": hp.grid_search([True, False])})
     auto_est.fit(data=train_dataloader_creator,
                  validation_data=valid_dataloader_creator,
                  search_space=search_space,
                  n_sampling=2,
                  epochs=1,
                  metric="accuracy")
     assert auto_est.get_best_model()
     best_config = auto_est.get_best_config()
     assert all(k in best_config.keys() for k in search_space.keys())
 def test_fit_data_creator(self):
     auto_est = AutoEstimator.from_torch(model_creator=model_creator,
                                         optimizer=get_optimizer,
                                         loss=nn.BCELoss(),
                                         logs_dir="/tmp/zoo_automl_logs",
                                         resources_per_trial={"cpu": 2},
                                         name="test_fit")
     search_space = create_linear_search_space()
     search_space.update({"shuffle": hp.grid_search([True, False])})
     auto_est.fit(data=train_dataloader_creator,
                  validation_data=valid_dataloader_creator,
                  search_space=search_space,
                  n_sampling=4,
                  epochs=1,
                  metric="accuracy")
     best_model = auto_est.get_best_model()
     assert best_model.optimizer.__class__.__name__ == "SGD"
     assert isinstance(best_model.loss_creator, nn.BCELoss)
    def test_select_feature(self):
        sample_num = np.random.randint(100, 200)
        df = pd.DataFrame({
            "datetime":
            pd.date_range('1/1/2019', periods=sample_num),
            "value":
            np.random.randn(sample_num),
            "id":
            np.array(['00'] * sample_num)
        })
        train_ts, val_ts, _ = TSDataset.from_pandas(df,
                                                    target_col=['value'],
                                                    dt_col='datetime',
                                                    id_col='id',
                                                    with_split=True,
                                                    val_ratio=0.1)

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'layer_num': hp.randint(1, 3),
            'lr': hp.choice([0.001, 0.003, 0.01]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        input_feature_dim, output_feature_dim = 1, 1
        auto_estimator = AutoTSEstimator(model='lstm',
                                         search_space=search_space,
                                         past_seq_len=6,
                                         future_seq_len=1,
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2,
                                         name="auto_trainer")

        auto_estimator.fit(data=train_ts,
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=val_ts,
                           n_sampling=1)
        config = auto_estimator.get_best_config()
        assert config['past_seq_len'] == 6
示例#19
0
    def __init__(self,
                 num_rand_samples=1,
                 epochs=5,
                 training_iteration=10,
                 time_step=[3, 4],
                 long_num=[3, 4],
                 cnn_height=[2, 3],
                 cnn_hid_size=[32, 50, 100],
                 ar_size=[2, 3],
                 batch_size=[32, 64]):
        """
        __init__()
        Constructor.

        :param num_rand_samples: number of hyper-param configurations sampled randomly
        :param training_iteration: no. of iterations for training (n epochs) in trials
        :param epochs: no. of epochs to train in each iteration
        :param time_step: random search candidates for model param "time_step"
        :param long_num: random search candidates for model param "long_num"
        :param ar_size: random search candidates for model param "ar_size"
        :param batch_size: grid search candidates for batch size
        :param cnn_height: random search candidates for model param "cnn_height"
        :param cnn_hid_size: random search candidates for model param "cnn_hid_size"
        """
        super(self.__class__, self).__init__()
        # -- run time params
        self.num_samples = num_rand_samples
        self.training_iteration = training_iteration

        # -- optimization params
        self.lr = hp.uniform(0.001, 0.01)
        self.batch_size = hp.grid_search(batch_size)
        self.epochs = epochs

        # ---- model params
        self.cnn_dropout = hp.uniform(0.2, 0.5)
        self.rnn_dropout = hp.uniform(0.2, 0.5)
        self.time_step = hp.choice(time_step)
        self.long_num = hp.choice(long_num, )
        self.cnn_height = hp.choice(cnn_height)
        self.cnn_hid_size = hp.choice(cnn_hid_size)
        self.ar_size = hp.choice(ar_size)
        self.past_seq_len = hp.sample_from(
            lambda spec: (spec.config.long_num + 1) * spec.config.time_step)
    init_orca_context(cluster_mode=args.cluster_mode,
                      cores=args.cores,
                      memory=args.memory,
                      num_nodes=num_nodes,
                      init_ray_on_spark=True)

    tsdata_train, tsdata_valid, tsdata_test = get_nyc_taxi_tsdataset(
        args.datadir)

    auto_lstm = AutoLSTM(input_feature_num=1,
                         output_target_num=1,
                         past_seq_len=14,
                         optimizer='Adam',
                         loss=torch.nn.MSELoss(),
                         metric="mse",
                         hidden_dim=hp.grid_search([32, 64]),
                         layer_num=hp.randint(1, 3),
                         lr=hp.choice([0.001, 0.003, 0.01]),
                         dropout=hp.uniform(0.1, 0.2),
                         logs_dir="/tmp/auto_lstm",
                         cpus_per_trial=args.cpus_per_trial,
                         name="auto_lstm")
    auto_lstm.fit(
        data=get_data_creator(tsdata_train),
        epochs=args.epoch,
        batch_size=hp.choice([32, 64]),
        validation_data=get_data_creator(tsdata_valid),
        n_sampling=args.n_sampling,
    )
    best_model = auto_lstm.get_best_model()
    best_config = auto_lstm.get_best_config()
    def test_fit_tcn_feature(self):
        input_feature_dim = 11  # This param will not be used
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True)
        tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler,
                                                              fit=False)

        search_space = {
            'hidden_units': hp.grid_search([32, 64]),
            'levels': hp.randint(4, 6),
            'kernel_size': hp.randint(3, 5),
            'dropout': hp.uniform(0.1, 0.2),
            'lr': hp.loguniform(0.001, 0.01)
        }
        auto_trainer = AutoTSTrainer(model='tcn',
                                     search_space=search_space,
                                     past_seq_len=hp.randint(4, 6),
                                     future_seq_len=1,
                                     input_feature_num=input_feature_dim,
                                     output_target_num=output_feature_dim,
                                     selected_features="auto",
                                     metric="mse",
                                     optimizer="Adam",
                                     loss=torch.nn.MSELoss(),
                                     logs_dir="/tmp/auto_trainer",
                                     cpus_per_trial=2,
                                     name="auto_trainer")
        ts_pipeline = auto_trainer.fit(data=tsdata_train,
                                       epochs=1,
                                       batch_size=hp.choice([32, 64]),
                                       validation_data=tsdata_valid,
                                       n_sampling=1)
        best_config = auto_trainer.get_best_config()
        best_model = auto_trainer.get_best_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        assert isinstance(ts_pipeline, TSPipeline)

        # use raw base model to predic and evaluate
        tsdata_valid.roll(lookback=best_config["past_seq_len"],
                          horizon=0,
                          feature_col=best_config["selected_features"])
        x_valid, y_valid = tsdata_valid.to_numpy()
        y_pred_raw = best_model.predict(x_valid)
        y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw)

        # use tspipeline to predic and evaluate
        eval_result = ts_pipeline.evaluate(tsdata_valid)
        y_pred = ts_pipeline.predict(tsdata_valid)

        # check if they are the same
        np.testing.assert_almost_equal(y_pred, y_pred_raw)

        # save and load
        ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_tcn")
        new_ts_pipeline = TSPipeline.load(
            "/tmp/auto_trainer/autots_tmp_model_tcn")

        # check if load ppl is the same as previous
        eval_result_new = new_ts_pipeline.evaluate(tsdata_valid)
        y_pred_new = new_ts_pipeline.predict(tsdata_valid)
        np.testing.assert_almost_equal(eval_result[0], eval_result_new[0])
        np.testing.assert_almost_equal(y_pred, y_pred_new)

        # use tspipeline to incrementally train
        new_ts_pipeline.fit(tsdata_valid)
示例#22
0
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import zoo.orca.automl.hp as hp


AUTO_MODEL_SUPPORT_LIST = ["lstm", "tcn", "seq2seq"]

AUTO_MODEL_DEFAULT_SEARCH_SPACE = {
    "lstm": {"minimal": {"hidden_dim": hp.grid_search([16, 32]),
                         "layer_num": hp.randint(1, 2),
                         "lr": hp.loguniform(0.001, 0.005),
                         "dropout": hp.uniform(0.1, 0.2)},
             "normal": {"hidden_dim": hp.grid_search([16, 32, 64]),
                        "layer_num": hp.grid_search([1, 2]),
                        "lr": hp.loguniform(0.0005, 0.01),
                        "dropout": hp.uniform(0, 0.2)},
             "large": {"hidden_dim": hp.grid_search([16, 32, 64, 128]),
                       "layer_num": hp.grid_search([1, 2, 3, 4]),
                       "lr": hp.loguniform(0.0005, 0.01),
                       "dropout": hp.uniform(0, 0.3)}},

    "tcn": {"minimal": {"hidden_units": hp.grid_search([16, 32]),
                        "levels": hp.randint(4, 6),
                        "kernel_size": 3,