def __init__( self, num_rand_samples=10, n_estimators_range=(50, 1000), max_depth_range=(2, 15), lr=(1e-4, 1e-1), min_child_weight=[1, 2, 3], ): """ Constructor. :param num_rand_samples: number of hyper-param configurations sampled randomly :param n_estimators_range: range of number of gradient boosted trees. :param max_depth_range: range of max tree depth :param lr: learning rate :param min_child_weight: minimum sum of instance weight(hessian) needed in a child. """ super(self.__class__, self).__init__() self.num_samples = num_rand_samples self.n_estimators_range = n_estimators_range self.max_depth_range = max_depth_range self.lr = hp.loguniform(lr[0], lr[1]) self.min_child_weight = hp.choice(min_child_weight)
def __init__(self, num_rand_samples=1, n_estimators=[8, 15], max_depth=[10, 15], n_jobs=-1, tree_method='hist', random_state=2, seed=0, lr=(1e-4, 1e-1), subsample=0.8, colsample_bytree=0.8, min_child_weight=[1, 2, 3], gamma=0, reg_alpha=0, reg_lambda=1): """ """ super(self.__class__, self).__init__() self.num_samples = num_rand_samples self.n_jobs = n_jobs self.tree_method = tree_method self.random_state = random_state self.seed = seed self.colsample_bytree = colsample_bytree self.gamma = gamma self.reg_alpha = reg_alpha self.reg_lambda = reg_lambda self.n_estimators = hp.grid_search(n_estimators) self.max_depth = hp.grid_search(max_depth) self.lr = hp.loguniform(lr[0], lr[-1]) self.subsample = subsample self.min_child_weight = hp.choice(min_child_weight)
def create_XGB_recipe(): from zoo.orca.automl import hp return { "n_estimators": hp.randint(5, 10), "max_depth": hp.randint(2, 5), "lr": hp.loguniform(1e-4, 1e-1), }
def __init__(self, num_rand_samples=1, n_estimators=[8, 15], max_depth=[10, 15], n_jobs=-1, tree_method='hist', random_state=2, seed=0, lr=(1e-4, 1e-1), subsample=0.8, colsample_bytree=0.8, min_child_weight=[1, 2, 3], gamma=0, reg_alpha=0, reg_lambda=1): """ Constructor. For XGBoost hyper parameters, refer to https://xgboost.readthedocs.io/en/latest/python/python_api.html for details. :param num_rand_samples: number of hyper-param configurations sampled randomly :param n_estimators: number of gradient boosted trees. :param max_depth: max tree depth :param n_jobs: number of parallel threads used to run xgboost. :param tree_method: specify which tree method to use. :param random_state: random number seed. :param seed: seed used to generate the folds :param lr: learning rate :param subsample: subsample ratio of the training instance :param colsample_bytree: subsample ratio of columns when constructing each tree. :param min_child_weight: minimum sum of instance weight(hessian) needed in a child. :param gamma: minimum loss reduction required to make a further partition on a leaf node of the tree. :param reg_alpha: L1 regularization term on weights (xgb’s alpha). :param reg_lambda: L2 regularization term on weights (xgb’s lambda). """ super(self.__class__, self).__init__() self.num_samples = num_rand_samples self.n_jobs = n_jobs self.tree_method = tree_method self.random_state = random_state self.seed = seed self.colsample_bytree = colsample_bytree self.gamma = gamma self.reg_alpha = reg_alpha self.reg_lambda = reg_lambda self.n_estimators = hp.grid_search(n_estimators) self.max_depth = hp.grid_search(max_depth) self.lr = hp.loguniform(lr[0], lr[-1]) self.subsample = subsample self.min_child_weight = hp.choice(min_child_weight)
def test_auto_prophet_save_load(self): data, expect_horizon = get_data() auto_prophet = AutoProphet(metric="mse", changepoint_prior_scale=hp.loguniform(0.001, 0.5), seasonality_prior_scale=hp.loguniform(0.01, 10), holidays_prior_scale=hp.loguniform(0.01, 10), seasonality_mode=hp.choice(['additive', 'multiplicative']), changepoint_range=hp.uniform(0.8, 0.95) ) auto_prophet.fit(data=data, expect_horizon=expect_horizon, n_sampling=1, ) with tempfile.TemporaryDirectory() as tmp_dir_name: ckpt_name = os.path.join(tmp_dir_name, "json") auto_prophet.save(ckpt_name) auto_prophet.restore(ckpt_name)
def test_auto_prophet_fit(self): data, expect_horizon = get_data() auto_prophet = AutoProphet(metric="mse", changepoint_prior_scale=hp.loguniform(0.001, 0.5), seasonality_prior_scale=hp.loguniform(0.01, 10), holidays_prior_scale=hp.loguniform(0.01, 10), seasonality_mode=hp.choice(['additive', 'multiplicative']), changepoint_range=hp.uniform(0.8, 0.95) ) auto_prophet.fit(data=data, expect_horizon=expect_horizon, n_sampling=1, ) best_model = auto_prophet.get_best_model() assert 0.001 <= best_model.changepoint_prior_scale <= 0.5 assert 0.01 <= best_model.seasonality_prior_scale <= 10 assert 0.01 <= best_model.holidays_prior_scale <= 10 assert best_model.seasonality_mode in ['additive', 'multiplicative'] assert 0.8 <= best_model.changepoint_range <= 0.95
def test_auto_prophet_predict_evaluate(self): data, expect_horizon = get_data() auto_prophet = AutoProphet(metric="mse", changepoint_prior_scale=hp.loguniform(0.001, 0.5), seasonality_prior_scale=hp.loguniform(0.01, 10), holidays_prior_scale=hp.loguniform(0.01, 10), seasonality_mode=hp.choice(['additive', 'multiplicative']), changepoint_range=hp.uniform(0.8, 0.95) ) auto_prophet.fit(data=data, cross_validation=False, expect_horizon=expect_horizon, n_sampling=1, ) auto_prophet.predict(horizon=1, freq="D") test_data = pd.DataFrame(pd.date_range('20150101', periods=10), columns=['ds']) test_data.insert(1, 'y', np.random.rand(10)) auto_prophet.evaluate(test_data)
def test_fit_tcn_feature(self): input_feature_dim = 11 # This param will not be used output_feature_dim = 2 # 2 targets are generated in get_tsdataset tsdata_train = get_tsdataset().gen_dt_feature() tsdata_valid = get_tsdataset().gen_dt_feature() tsdata_test = get_tsdataset().gen_dt_feature() search_space = { 'hidden_units': hp.grid_search([32, 64]), 'levels': hp.randint(4, 6), 'kernel_size': hp.randint(3, 5), 'dropout': hp.uniform(0.1, 0.2), 'lr': hp.loguniform(0.001, 0.01) } auto_trainer = AutoTSTrainer(model='tcn', search_space=search_space, past_seq_len=hp.randint(4, 6), future_seq_len=1, input_feature_num=input_feature_dim, output_target_num=output_feature_dim, selected_features="auto", metric="mse", optimizer="Adam", loss=torch.nn.MSELoss(), logs_dir="/tmp/auto_trainer", cpus_per_trial=2, name="auto_trainer") auto_trainer.fit(data=tsdata_train, epochs=1, batch_size=hp.choice([32, 64]), validation_data=tsdata_valid, n_sampling=1) best_config = auto_trainer.get_best_config() best_model = auto_trainer.get_best_model() assert 4 <= best_config["past_seq_len"] <= 6 # really difficult to use the model currently... tsdata_test.roll(lookback=best_config["past_seq_len"], horizon=1, feature_col=best_config["selected_features"]) x_test, y_test = tsdata_test.to_numpy() y_pred = best_model.predict(x_test) best_model.save("best.ckpt") from zoo.automl.model.base_pytorch_model import PytorchModelBuilder restore_model = PytorchModelBuilder( model_creator=best_model.model_creator, optimizer_creator="Adam", loss_creator=torch.nn.MSELoss()).build(best_config) restore_model.restore("best.ckpt") y_pred_restore = restore_model.predict(x_test) np.testing.assert_almost_equal(y_pred, y_pred_restore)
def test_fit(self): auto_prophet = AutoProphet() data = get_data() search_space = { "changepoint_prior_scale": hp.loguniform(0.001, 0.5), "seasonality_prior_scale": hp.loguniform(0.01, 10), "holidays_prior_scale": hp.loguniform(0.01, 10), "seasonality_mode": hp.choice(['additive', 'multiplicative']), "changepoint_range": hp.uniform(0.8, 0.95) } auto_prophet.fit(data=data, epochs=1, metric="mse", n_sampling=10, search_space=search_space, ) best_model = auto_prophet.get_best_model() assert 0.001 <= best_model.model.changepoint_prior_scale <= 0.5 assert 0.01 <= best_model.model.seasonality_prior_scale <= 10 assert 0.01 <= best_model.holidays_prior_scale <= 10 assert best_model.model.seasonality_mode in ['additive', 'multiplicative'] assert 0.8 <= best_model.model.changepoint_range <= 0.95
def _gen_sample_func(self, ranges, param_name): if isinstance(ranges, tuple): assert len(ranges) == 2, \ f"length of tuple {param_name} should be 2 while get {len(ranges)} instead." assert param_name != "teacher_forcing", \ f"type of {param_name} can only be a list while get a tuple" if param_name in ["lr"]: return hp.loguniform(lower=ranges[0], upper=ranges[1]) if param_name in [ "lstm_hidden_dim", "lstm_layer_num", "batch_size" ]: return hp.randint(lower=ranges[0], upper=ranges[1]) if param_name in ["dropout"]: return hp.uniform(lower=ranges[0], upper=ranges[1]) if isinstance(ranges, list): return hp.grid_search(ranges) raise RuntimeError(f"{param_name} should be either a list or a tuple.")
def __init__( self, num_rand_samples=10, n_estimators_range=(50, 1000), max_depth_range=(2, 15), lr=(1e-4, 1e-1), min_child_weight=[1, 2, 3], ): """ """ super(self.__class__, self).__init__() self.num_samples = num_rand_samples self.n_estimators_range = n_estimators_range self.max_depth_range = max_depth_range self.lr = hp.loguniform(lr[0], lr[1]) self.min_child_weight = hp.choice(min_child_weight)
def __init__(self, changepoint_prior_scale=hp.loguniform(0.001, 0.5), seasonality_prior_scale=hp.loguniform(0.01, 10), holidays_prior_scale=hp.loguniform(0.01, 10), seasonality_mode=hp.choice(['additive', 'multiplicative']), changepoint_range=hp.uniform(0.8, 0.95), metric='mse', logs_dir="/tmp/auto_prophet_logs", cpus_per_trial=1, name="auto_prophet", remote_dir=None, **prophet_config ): """ Create an automated Prophet Model. User need to specify either the exact value or the search space of the Prophet model hyperparameters. For details of the Prophet model hyperparameters, refer to https://facebook.github.io/prophet/docs/diagnostics.html#hyperparameter-tuning. :param changepoint_prior_scale: Int or hp sampling function from an integer space for hyperparameter changepoint_prior_scale for the Prophet model. For hp sampling, see zoo.chronos.orca.automl.hp for more details. e.g. hp.loguniform(0.001, 0.5). :param seasonality_prior_scale: hyperparameter seasonality_prior_scale for the Prophet model. e.g. hp.loguniform(0.01, 10). :param holidays_prior_scale: hyperparameter holidays_prior_scale for the Prophet model. e.g. hp.loguniform(0.01, 10). :param seasonality_mode: hyperparameter seasonality_mode for the Prophet model. e.g. hp.choice(['additive', 'multiplicative']). :param changepoint_range: hyperparameter changepoint_range for the Prophet model. e.g. hp.uniform(0.8, 0.95). :param metric: String. The evaluation metric name to optimize. e.g. "mse" :param logs_dir: Local directory to save logs and results. It defaults to "/tmp/auto_prophet_logs" :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1. :param name: name of the AutoProphet. It defaults to "auto_prophet" :param remote_dir: String. Remote directory to sync training results and checkpoints. It defaults to None and doesn't take effects while running in local. While running in cluster, it defaults to "hdfs:///tmp/{name}". :param prophet_config: Other Prophet hyperparameters. """ self.search_space = { "changepoint_prior_scale": changepoint_prior_scale, "seasonality_prior_scale": seasonality_prior_scale, "holidays_prior_scale": holidays_prior_scale, "seasonality_mode": seasonality_mode, "changepoint_range": changepoint_range } self.search_space.update(prophet_config) # update other configs self.metric = metric model_builder = ProphetBuilder() self.auto_est = AutoEstimator(model_builder=model_builder, logs_dir=logs_dir, resources_per_trial={"cpu": cpus_per_trial}, remote_dir=remote_dir, name=name)
def get_xgb_search_space(): return { "n_estimators": hp.randint(5, 10), "max_depth": hp.randint(2, 5), "lr": hp.loguniform(1e-4, 1e-1), }
def test_fit_tcn_feature(self): input_feature_dim = 11 # This param will not be used output_feature_dim = 2 # 2 targets are generated in get_tsdataset from sklearn.preprocessing import StandardScaler scaler = StandardScaler() tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True) tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler, fit=False) search_space = { 'hidden_units': hp.grid_search([32, 64]), 'levels': hp.randint(4, 6), 'kernel_size': hp.randint(3, 5), 'dropout': hp.uniform(0.1, 0.2), 'lr': hp.loguniform(0.001, 0.01) } auto_trainer = AutoTSTrainer(model='tcn', search_space=search_space, past_seq_len=hp.randint(4, 6), future_seq_len=1, input_feature_num=input_feature_dim, output_target_num=output_feature_dim, selected_features="auto", metric="mse", optimizer="Adam", loss=torch.nn.MSELoss(), logs_dir="/tmp/auto_trainer", cpus_per_trial=2, name="auto_trainer") ts_pipeline = auto_trainer.fit(data=tsdata_train, epochs=1, batch_size=hp.choice([32, 64]), validation_data=tsdata_valid, n_sampling=1) best_config = auto_trainer.get_best_config() best_model = auto_trainer.get_best_model() assert 4 <= best_config["past_seq_len"] <= 6 assert isinstance(ts_pipeline, TSPipeline) # use raw base model to predic and evaluate tsdata_valid.roll(lookback=best_config["past_seq_len"], horizon=0, feature_col=best_config["selected_features"]) x_valid, y_valid = tsdata_valid.to_numpy() y_pred_raw = best_model.predict(x_valid) y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw) # use tspipeline to predic and evaluate eval_result = ts_pipeline.evaluate(tsdata_valid) y_pred = ts_pipeline.predict(tsdata_valid) # check if they are the same np.testing.assert_almost_equal(y_pred, y_pred_raw) # save and load ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_tcn") new_ts_pipeline = TSPipeline.load( "/tmp/auto_trainer/autots_tmp_model_tcn") # check if load ppl is the same as previous eval_result_new = new_ts_pipeline.evaluate(tsdata_valid) y_pred_new = new_ts_pipeline.predict(tsdata_valid) np.testing.assert_almost_equal(eval_result[0], eval_result_new[0]) np.testing.assert_almost_equal(y_pred, y_pred_new) # use tspipeline to incrementally train new_ts_pipeline.fit(tsdata_valid)
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import zoo.orca.automl.hp as hp AUTO_MODEL_SUPPORT_LIST = ["lstm", "tcn", "seq2seq"] AUTO_MODEL_DEFAULT_SEARCH_SPACE = { "lstm": {"minimal": {"hidden_dim": hp.grid_search([16, 32]), "layer_num": hp.randint(1, 2), "lr": hp.loguniform(0.001, 0.005), "dropout": hp.uniform(0.1, 0.2)}, "normal": {"hidden_dim": hp.grid_search([16, 32, 64]), "layer_num": hp.grid_search([1, 2]), "lr": hp.loguniform(0.0005, 0.01), "dropout": hp.uniform(0, 0.2)}, "large": {"hidden_dim": hp.grid_search([16, 32, 64, 128]), "layer_num": hp.grid_search([1, 2, 3, 4]), "lr": hp.loguniform(0.0005, 0.01), "dropout": hp.uniform(0, 0.3)}}, "tcn": {"minimal": {"hidden_units": hp.grid_search([16, 32]), "levels": hp.randint(4, 6), "kernel_size": 3, "lr": hp.loguniform(0.001, 0.005), "dropout": hp.uniform(0.1, 0.2)},