Пример #1
0
    def __init__(
            self,
            num_rand_samples=10,
            n_estimators_range=(50, 1000),
            max_depth_range=(2, 15),
            lr=(1e-4, 1e-1),
            min_child_weight=[1, 2, 3],
    ):
        """
        Constructor.

        :param num_rand_samples: number of hyper-param configurations sampled
          randomly
        :param n_estimators_range: range of number of gradient boosted trees.
        :param max_depth_range: range of max tree depth
        :param lr: learning rate
        :param min_child_weight: minimum sum of instance weight(hessian)
          needed in a child.
        """
        super(self.__class__, self).__init__()

        self.num_samples = num_rand_samples

        self.n_estimators_range = n_estimators_range
        self.max_depth_range = max_depth_range
        self.lr = hp.loguniform(lr[0], lr[1])
        self.min_child_weight = hp.choice(min_child_weight)
Пример #2
0
    def __init__(self,
                 num_rand_samples=1,
                 n_estimators=[8, 15],
                 max_depth=[10, 15],
                 n_jobs=-1,
                 tree_method='hist',
                 random_state=2,
                 seed=0,
                 lr=(1e-4, 1e-1),
                 subsample=0.8,
                 colsample_bytree=0.8,
                 min_child_weight=[1, 2, 3],
                 gamma=0,
                 reg_alpha=0,
                 reg_lambda=1):
        """
        """
        super(self.__class__, self).__init__()

        self.num_samples = num_rand_samples
        self.n_jobs = n_jobs
        self.tree_method = tree_method
        self.random_state = random_state
        self.seed = seed

        self.colsample_bytree = colsample_bytree
        self.gamma = gamma
        self.reg_alpha = reg_alpha
        self.reg_lambda = reg_lambda

        self.n_estimators = hp.grid_search(n_estimators)
        self.max_depth = hp.grid_search(max_depth)
        self.lr = hp.loguniform(lr[0], lr[-1])
        self.subsample = subsample
        self.min_child_weight = hp.choice(min_child_weight)
Пример #3
0
def create_XGB_recipe():
    from zoo.orca.automl import hp
    return {
        "n_estimators": hp.randint(5, 10),
        "max_depth": hp.randint(2, 5),
        "lr": hp.loguniform(1e-4, 1e-1),
    }
Пример #4
0
    def __init__(self,
                 num_rand_samples=1,
                 n_estimators=[8, 15],
                 max_depth=[10, 15],
                 n_jobs=-1,
                 tree_method='hist',
                 random_state=2,
                 seed=0,
                 lr=(1e-4, 1e-1),
                 subsample=0.8,
                 colsample_bytree=0.8,
                 min_child_weight=[1, 2, 3],
                 gamma=0,
                 reg_alpha=0,
                 reg_lambda=1):
        """
        Constructor. For XGBoost hyper parameters, refer to
        https://xgboost.readthedocs.io/en/latest/python/python_api.html for
        details.

        :param num_rand_samples: number of hyper-param configurations sampled
          randomly
        :param n_estimators: number of gradient boosted trees.
        :param max_depth: max tree depth
        :param n_jobs: number of parallel threads used to run xgboost.
        :param tree_method: specify which tree method to use.
        :param random_state: random number seed.
        :param seed: seed used to generate the folds
        :param lr: learning rate
        :param subsample: subsample ratio of the training instance
        :param colsample_bytree: subsample ratio of columns when constructing
          each tree.
        :param min_child_weight: minimum sum of instance weight(hessian)
          needed in a child.
        :param gamma: minimum loss reduction required to make a further
          partition on a leaf node of the tree.
        :param reg_alpha: L1 regularization term on weights (xgb’s alpha).
        :param reg_lambda: L2 regularization term on weights (xgb’s lambda).

        """
        super(self.__class__, self).__init__()

        self.num_samples = num_rand_samples
        self.n_jobs = n_jobs
        self.tree_method = tree_method
        self.random_state = random_state
        self.seed = seed

        self.colsample_bytree = colsample_bytree
        self.gamma = gamma
        self.reg_alpha = reg_alpha
        self.reg_lambda = reg_lambda

        self.n_estimators = hp.grid_search(n_estimators)
        self.max_depth = hp.grid_search(max_depth)
        self.lr = hp.loguniform(lr[0], lr[-1])
        self.subsample = subsample
        self.min_child_weight = hp.choice(min_child_weight)
Пример #5
0
    def test_auto_prophet_save_load(self):
        data, expect_horizon = get_data()
        auto_prophet = AutoProphet(metric="mse",
                                   changepoint_prior_scale=hp.loguniform(0.001, 0.5),
                                   seasonality_prior_scale=hp.loguniform(0.01, 10),
                                   holidays_prior_scale=hp.loguniform(0.01, 10),
                                   seasonality_mode=hp.choice(['additive', 'multiplicative']),
                                   changepoint_range=hp.uniform(0.8, 0.95)
                                   )

        auto_prophet.fit(data=data,
                         expect_horizon=expect_horizon,
                         n_sampling=1,
                         )
        with tempfile.TemporaryDirectory() as tmp_dir_name:
            ckpt_name = os.path.join(tmp_dir_name, "json")
            auto_prophet.save(ckpt_name)
            auto_prophet.restore(ckpt_name)
Пример #6
0
    def test_auto_prophet_fit(self):
        data, expect_horizon = get_data()
        auto_prophet = AutoProphet(metric="mse",
                                   changepoint_prior_scale=hp.loguniform(0.001, 0.5),
                                   seasonality_prior_scale=hp.loguniform(0.01, 10),
                                   holidays_prior_scale=hp.loguniform(0.01, 10),
                                   seasonality_mode=hp.choice(['additive', 'multiplicative']),
                                   changepoint_range=hp.uniform(0.8, 0.95)
                                   )

        auto_prophet.fit(data=data,
                         expect_horizon=expect_horizon,
                         n_sampling=1,
                         )
        best_model = auto_prophet.get_best_model()
        assert 0.001 <= best_model.changepoint_prior_scale <= 0.5
        assert 0.01 <= best_model.seasonality_prior_scale <= 10
        assert 0.01 <= best_model.holidays_prior_scale <= 10
        assert best_model.seasonality_mode in ['additive', 'multiplicative']
        assert 0.8 <= best_model.changepoint_range <= 0.95
Пример #7
0
    def test_auto_prophet_predict_evaluate(self):
        data, expect_horizon = get_data()
        auto_prophet = AutoProphet(metric="mse",
                                   changepoint_prior_scale=hp.loguniform(0.001, 0.5),
                                   seasonality_prior_scale=hp.loguniform(0.01, 10),
                                   holidays_prior_scale=hp.loguniform(0.01, 10),
                                   seasonality_mode=hp.choice(['additive', 'multiplicative']),
                                   changepoint_range=hp.uniform(0.8, 0.95)
                                   )

        auto_prophet.fit(data=data,
                         cross_validation=False,
                         expect_horizon=expect_horizon,
                         n_sampling=1,
                         )

        auto_prophet.predict(horizon=1, freq="D")
        test_data = pd.DataFrame(pd.date_range('20150101', periods=10),
                                 columns=['ds'])
        test_data.insert(1, 'y', np.random.rand(10))
        auto_prophet.evaluate(test_data)
Пример #8
0
    def test_fit_tcn_feature(self):
        input_feature_dim = 11  # This param will not be used
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        tsdata_train = get_tsdataset().gen_dt_feature()
        tsdata_valid = get_tsdataset().gen_dt_feature()
        tsdata_test = get_tsdataset().gen_dt_feature()

        search_space = {
            'hidden_units': hp.grid_search([32, 64]),
            'levels': hp.randint(4, 6),
            'kernel_size': hp.randint(3, 5),
            'dropout': hp.uniform(0.1, 0.2),
            'lr': hp.loguniform(0.001, 0.01)
        }
        auto_trainer = AutoTSTrainer(model='tcn',
                                     search_space=search_space,
                                     past_seq_len=hp.randint(4, 6),
                                     future_seq_len=1,
                                     input_feature_num=input_feature_dim,
                                     output_target_num=output_feature_dim,
                                     selected_features="auto",
                                     metric="mse",
                                     optimizer="Adam",
                                     loss=torch.nn.MSELoss(),
                                     logs_dir="/tmp/auto_trainer",
                                     cpus_per_trial=2,
                                     name="auto_trainer")
        auto_trainer.fit(data=tsdata_train,
                         epochs=1,
                         batch_size=hp.choice([32, 64]),
                         validation_data=tsdata_valid,
                         n_sampling=1)
        best_config = auto_trainer.get_best_config()
        best_model = auto_trainer.get_best_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        # really difficult to use the model currently...
        tsdata_test.roll(lookback=best_config["past_seq_len"],
                         horizon=1,
                         feature_col=best_config["selected_features"])
        x_test, y_test = tsdata_test.to_numpy()
        y_pred = best_model.predict(x_test)
        best_model.save("best.ckpt")
        from zoo.automl.model.base_pytorch_model import PytorchModelBuilder
        restore_model = PytorchModelBuilder(
            model_creator=best_model.model_creator,
            optimizer_creator="Adam",
            loss_creator=torch.nn.MSELoss()).build(best_config)
        restore_model.restore("best.ckpt")
        y_pred_restore = restore_model.predict(x_test)
        np.testing.assert_almost_equal(y_pred, y_pred_restore)
Пример #9
0
 def test_fit(self):
     auto_prophet = AutoProphet()
     data = get_data()
     search_space = {
         "changepoint_prior_scale": hp.loguniform(0.001, 0.5),
         "seasonality_prior_scale": hp.loguniform(0.01, 10),
         "holidays_prior_scale": hp.loguniform(0.01, 10),
         "seasonality_mode": hp.choice(['additive', 'multiplicative']),
         "changepoint_range": hp.uniform(0.8, 0.95)
     }
     auto_prophet.fit(data=data,
                      epochs=1,
                      metric="mse",
                      n_sampling=10,
                      search_space=search_space,
                      )
     best_model = auto_prophet.get_best_model()
     assert 0.001 <= best_model.model.changepoint_prior_scale <= 0.5
     assert 0.01 <= best_model.model.seasonality_prior_scale <= 10
     assert 0.01 <= best_model.holidays_prior_scale <= 10
     assert best_model.model.seasonality_mode in ['additive', 'multiplicative']
     assert 0.8 <= best_model.model.changepoint_range <= 0.95
Пример #10
0
 def _gen_sample_func(self, ranges, param_name):
     if isinstance(ranges, tuple):
         assert len(ranges) == 2, \
             f"length of tuple {param_name} should be 2 while get {len(ranges)} instead."
         assert param_name != "teacher_forcing", \
             f"type of {param_name} can only be a list while get a tuple"
         if param_name in ["lr"]:
             return hp.loguniform(lower=ranges[0], upper=ranges[1])
         if param_name in [
                 "lstm_hidden_dim", "lstm_layer_num", "batch_size"
         ]:
             return hp.randint(lower=ranges[0], upper=ranges[1])
         if param_name in ["dropout"]:
             return hp.uniform(lower=ranges[0], upper=ranges[1])
     if isinstance(ranges, list):
         return hp.grid_search(ranges)
     raise RuntimeError(f"{param_name} should be either a list or a tuple.")
Пример #11
0
    def __init__(
            self,
            num_rand_samples=10,
            n_estimators_range=(50, 1000),
            max_depth_range=(2, 15),
            lr=(1e-4, 1e-1),
            min_child_weight=[1, 2, 3],
    ):
        """
        """
        super(self.__class__, self).__init__()

        self.num_samples = num_rand_samples

        self.n_estimators_range = n_estimators_range
        self.max_depth_range = max_depth_range
        self.lr = hp.loguniform(lr[0], lr[1])
        self.min_child_weight = hp.choice(min_child_weight)
Пример #12
0
    def __init__(self,
                 changepoint_prior_scale=hp.loguniform(0.001, 0.5),
                 seasonality_prior_scale=hp.loguniform(0.01, 10),
                 holidays_prior_scale=hp.loguniform(0.01, 10),
                 seasonality_mode=hp.choice(['additive', 'multiplicative']),
                 changepoint_range=hp.uniform(0.8, 0.95),
                 metric='mse',
                 logs_dir="/tmp/auto_prophet_logs",
                 cpus_per_trial=1,
                 name="auto_prophet",
                 remote_dir=None,
                 **prophet_config
                 ):
        """
        Create an automated Prophet Model.
        User need to specify either the exact value or the search space of the
        Prophet model hyperparameters. For details of the Prophet model hyperparameters, refer to
        https://facebook.github.io/prophet/docs/diagnostics.html#hyperparameter-tuning.

        :param changepoint_prior_scale: Int or hp sampling function from an integer space
            for hyperparameter changepoint_prior_scale for the Prophet model.
            For hp sampling, see zoo.chronos.orca.automl.hp for more details.
            e.g. hp.loguniform(0.001, 0.5).
        :param seasonality_prior_scale: hyperparameter seasonality_prior_scale for the
            Prophet model.
            e.g. hp.loguniform(0.01, 10).
        :param holidays_prior_scale: hyperparameter holidays_prior_scale for the
            Prophet model.
            e.g. hp.loguniform(0.01, 10).
        :param seasonality_mode: hyperparameter seasonality_mode for the
            Prophet model.
            e.g. hp.choice(['additive', 'multiplicative']).
        :param changepoint_range: hyperparameter changepoint_range for the
            Prophet model.
            e.g. hp.uniform(0.8, 0.95).
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param logs_dir: Local directory to save logs and results. It defaults to
            "/tmp/auto_prophet_logs"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoProphet. It defaults to "auto_prophet"
        :param remote_dir: String. Remote directory to sync training results and checkpoints. It
            defaults to None and doesn't take effects while running in local. While running in
            cluster, it defaults to "hdfs:///tmp/{name}".

        :param prophet_config: Other Prophet hyperparameters.
        """
        self.search_space = {
            "changepoint_prior_scale": changepoint_prior_scale,
            "seasonality_prior_scale": seasonality_prior_scale,
            "holidays_prior_scale": holidays_prior_scale,
            "seasonality_mode": seasonality_mode,
            "changepoint_range": changepoint_range
        }
        self.search_space.update(prophet_config)  # update other configs
        self.metric = metric
        model_builder = ProphetBuilder()
        self.auto_est = AutoEstimator(model_builder=model_builder,
                                      logs_dir=logs_dir,
                                      resources_per_trial={"cpu": cpus_per_trial},
                                      remote_dir=remote_dir,
                                      name=name)
Пример #13
0
def get_xgb_search_space():
    return {
        "n_estimators": hp.randint(5, 10),
        "max_depth": hp.randint(2, 5),
        "lr": hp.loguniform(1e-4, 1e-1),
    }
    def test_fit_tcn_feature(self):
        input_feature_dim = 11  # This param will not be used
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True)
        tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler,
                                                              fit=False)

        search_space = {
            'hidden_units': hp.grid_search([32, 64]),
            'levels': hp.randint(4, 6),
            'kernel_size': hp.randint(3, 5),
            'dropout': hp.uniform(0.1, 0.2),
            'lr': hp.loguniform(0.001, 0.01)
        }
        auto_trainer = AutoTSTrainer(model='tcn',
                                     search_space=search_space,
                                     past_seq_len=hp.randint(4, 6),
                                     future_seq_len=1,
                                     input_feature_num=input_feature_dim,
                                     output_target_num=output_feature_dim,
                                     selected_features="auto",
                                     metric="mse",
                                     optimizer="Adam",
                                     loss=torch.nn.MSELoss(),
                                     logs_dir="/tmp/auto_trainer",
                                     cpus_per_trial=2,
                                     name="auto_trainer")
        ts_pipeline = auto_trainer.fit(data=tsdata_train,
                                       epochs=1,
                                       batch_size=hp.choice([32, 64]),
                                       validation_data=tsdata_valid,
                                       n_sampling=1)
        best_config = auto_trainer.get_best_config()
        best_model = auto_trainer.get_best_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        assert isinstance(ts_pipeline, TSPipeline)

        # use raw base model to predic and evaluate
        tsdata_valid.roll(lookback=best_config["past_seq_len"],
                          horizon=0,
                          feature_col=best_config["selected_features"])
        x_valid, y_valid = tsdata_valid.to_numpy()
        y_pred_raw = best_model.predict(x_valid)
        y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw)

        # use tspipeline to predic and evaluate
        eval_result = ts_pipeline.evaluate(tsdata_valid)
        y_pred = ts_pipeline.predict(tsdata_valid)

        # check if they are the same
        np.testing.assert_almost_equal(y_pred, y_pred_raw)

        # save and load
        ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_tcn")
        new_ts_pipeline = TSPipeline.load(
            "/tmp/auto_trainer/autots_tmp_model_tcn")

        # check if load ppl is the same as previous
        eval_result_new = new_ts_pipeline.evaluate(tsdata_valid)
        y_pred_new = new_ts_pipeline.predict(tsdata_valid)
        np.testing.assert_almost_equal(eval_result[0], eval_result_new[0])
        np.testing.assert_almost_equal(y_pred, y_pred_new)

        # use tspipeline to incrementally train
        new_ts_pipeline.fit(tsdata_valid)
Пример #15
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import zoo.orca.automl.hp as hp


AUTO_MODEL_SUPPORT_LIST = ["lstm", "tcn", "seq2seq"]

AUTO_MODEL_DEFAULT_SEARCH_SPACE = {
    "lstm": {"minimal": {"hidden_dim": hp.grid_search([16, 32]),
                         "layer_num": hp.randint(1, 2),
                         "lr": hp.loguniform(0.001, 0.005),
                         "dropout": hp.uniform(0.1, 0.2)},
             "normal": {"hidden_dim": hp.grid_search([16, 32, 64]),
                        "layer_num": hp.grid_search([1, 2]),
                        "lr": hp.loguniform(0.0005, 0.01),
                        "dropout": hp.uniform(0, 0.2)},
             "large": {"hidden_dim": hp.grid_search([16, 32, 64, 128]),
                       "layer_num": hp.grid_search([1, 2, 3, 4]),
                       "lr": hp.loguniform(0.0005, 0.01),
                       "dropout": hp.uniform(0, 0.3)}},

    "tcn": {"minimal": {"hidden_units": hp.grid_search([16, 32]),
                        "levels": hp.randint(4, 6),
                        "kernel_size": 3,
                        "lr": hp.loguniform(0.001, 0.005),
                        "dropout": hp.uniform(0.1, 0.2)},