def search_space(self): space = { "n_estimators": hp.randint(self.n_estimators_range[0], self.n_estimators_range[1]), "max_depth": hp.randint(self.max_depth_range[0], self.max_depth_range[1]), "min_child_weight": self.min_child_weight, "lr": self.lr } return space
def get_past_seq_config(look_back): """ get_past_seq_config(look_back) Generate pass sequence config based on look_back. :param look_back: look_back configuration :return: search configuration for past sequence """ if isinstance(look_back, tuple) and len(look_back) == 2 and isinstance( look_back[0], int) and isinstance(look_back[1], int): if look_back[1] < 2: raise ValueError( "The max look back value should be at least 2") if look_back[0] < 2: print("The input min look back value is smaller than 2. " "We sample from range (2, {}) instead.".format( look_back[1])) past_seq_config = hp.randint(look_back[0], look_back[1] + 1) elif isinstance(look_back, int): if look_back < 2: raise ValueError( "look back value should not be smaller than 2. " "Current value is ", look_back) past_seq_config = look_back else: raise ValueError( "look back is {}.\n " "look_back should be either a tuple with 2 int values:" " (min_len, max_len) or a single int".format(look_back)) return past_seq_config
def test_fit(self): data, validation_data = get_data() auto_arima = AutoARIMA(metric="mse", p=hp.randint(0, 4), q=hp.randint(0, 4), seasonality_mode=hp.choice([True, False]), P=hp.randint(5, 12), Q=hp.randint(5, 12), m=hp.choice([4, 7])) auto_arima.fit( data=data, validation_data=validation_data, epochs=1, n_sampling=1, ) best_model = auto_arima.get_best_model()
def test_num_channels(self): auto_tcn = AutoTCN(input_feature_num=input_feature_dim, output_target_num=output_feature_dim, past_seq_len=past_seq_len, future_seq_len=future_seq_len, optimizer='Adam', loss=torch.nn.MSELoss(), metric="mse", hidden_units=4, levels=hp.randint(1, 3), num_channels=[8] * 2, kernel_size=hp.choice([2, 3]), lr=hp.choice([0.001, 0.003, 0.01]), dropout=hp.uniform(0.1, 0.2), logs_dir="/tmp/auto_tcn", cpus_per_trial=2, name="auto_tcn") auto_tcn.fit( data=train_dataloader_creator, epochs=1, batch_size=hp.choice([32, 64]), validation_data=valid_dataloader_creator, n_sampling=1, ) assert auto_tcn.get_best_model() best_config = auto_tcn.get_best_config() assert best_config['num_channels'] == [8] * 2
def test_fit_lstm_data_creator(self): input_feature_dim = 4 output_feature_dim = 2 # 2 targets are generated in get_tsdataset search_space = { 'hidden_dim': hp.grid_search([32, 64]), 'layer_num': hp.randint(1, 3), 'lr': hp.choice([0.001, 0.003, 0.01]), 'dropout': hp.uniform(0.1, 0.2) } auto_estimator = AutoTSEstimator(model='lstm', search_space=search_space, past_seq_len=7, future_seq_len=1, input_feature_num=input_feature_dim, output_target_num=output_feature_dim, selected_features="auto", metric="mse", loss=torch.nn.MSELoss(), logs_dir="/tmp/auto_trainer", cpus_per_trial=2, name="auto_trainer") auto_estimator.fit(data=get_data_creator(), epochs=1, batch_size=hp.choice([32, 64]), validation_data=get_data_creator(), n_sampling=1) config = auto_estimator.get_best_config() assert config["past_seq_len"] == 7
def test_fit_third_party_feature(self): from sklearn.preprocessing import StandardScaler scaler = StandardScaler() tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True) tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler, fit=False) search_space = { 'hidden_dim': hp.grid_search([32, 64]), 'dropout': hp.uniform(0.1, 0.2) } auto_estimator = AutoTSEstimator(model=model_creator, search_space=search_space, past_seq_len=hp.randint(4, 6), future_seq_len=1, selected_features="auto", metric="mse", loss=torch.nn.MSELoss(), cpus_per_trial=2) ts_pipeline = auto_estimator.fit(data=tsdata_train, epochs=1, batch_size=hp.choice([32, 64]), validation_data=tsdata_valid, n_sampling=1) best_config = auto_estimator.get_best_config() best_model = auto_estimator._get_best_automl_model() assert 4 <= best_config["past_seq_len"] <= 6 assert isinstance(ts_pipeline, TSPipeline) # use raw base model to predic and evaluate tsdata_valid.roll(lookback=best_config["past_seq_len"], horizon=0, feature_col=best_config["selected_features"]) x_valid, y_valid = tsdata_valid.to_numpy() y_pred_raw = best_model.predict(x_valid) y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw) # use tspipeline to predic and evaluate eval_result = ts_pipeline.evaluate(tsdata_valid) y_pred = ts_pipeline.predict(tsdata_valid) # check if they are the same np.testing.assert_almost_equal(y_pred, y_pred_raw) # save and load ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_3rdparty") new_ts_pipeline = TSPipeline.load( "/tmp/auto_trainer/autots_tmp_model_3rdparty") # check if load ppl is the same as previous eval_result_new = new_ts_pipeline.evaluate(tsdata_valid) y_pred_new = new_ts_pipeline.predict(tsdata_valid) np.testing.assert_almost_equal(eval_result[0], eval_result_new[0]) np.testing.assert_almost_equal(y_pred, y_pred_new) # use tspipeline to incrementally train new_ts_pipeline.fit(tsdata_valid)
def get_auto_estimator(): auto_lstm = AutoLSTM(input_feature_num=input_feature_dim, output_target_num=output_feature_dim, past_seq_len=5, optimizer='Adam', loss=torch.nn.MSELoss(), metric="mse", hidden_dim=hp.grid_search([32, 64]), layer_num=hp.randint(1, 3), lr=hp.choice([0.001, 0.003, 0.01]), dropout=hp.uniform(0.1, 0.2), logs_dir="/tmp/auto_lstm", cpus_per_trial=2, name="auto_lstm") return auto_lstm
def _gen_sample_func(self, ranges, param_name): if isinstance(ranges, tuple): assert len(ranges) == 2, \ f"length of tuple {param_name} should be 2 while get {len(ranges)} instead." assert param_name != "teacher_forcing", \ f"type of {param_name} can only be a list while get a tuple" if param_name in ["lr"]: return hp.loguniform(lower=ranges[0], upper=ranges[1]) if param_name in [ "lstm_hidden_dim", "lstm_layer_num", "batch_size" ]: return hp.randint(lower=ranges[0], upper=ranges[1]) if param_name in ["dropout"]: return hp.uniform(lower=ranges[0], upper=ranges[1]) if isinstance(ranges, list): return hp.grid_search(ranges) raise RuntimeError(f"{param_name} should be either a list or a tuple.")
def get_auto_estimator(): auto_tcn = AutoTCN(input_feature_num=input_feature_dim, output_target_num=output_feature_dim, past_seq_len=past_seq_len, future_seq_len=future_seq_len, optimizer='Adam', loss=torch.nn.MSELoss(), metric="mse", hidden_units=8, levels=hp.randint(1, 3), kernel_size=hp.choice([2, 3]), lr=hp.choice([0.001, 0.003, 0.01]), dropout=hp.uniform(0.1, 0.2), logs_dir="/tmp/auto_tcn", cpus_per_trial=2, name="auto_tcn") return auto_tcn
def get_auto_estimator(): auto_seq2seq = AutoSeq2Seq(input_feature_num=input_feature_dim, output_target_num=output_feature_dim, past_seq_len=past_seq_len, future_seq_len=future_seq_len, optimizer='Adam', loss=torch.nn.MSELoss(), metric="mse", lr=hp.choice([0.001, 0.003, 0.01]), lstm_hidden_dim=hp.grid_search([32, 64, 128]), lstm_layer_num=hp.randint(1, 4), dropout=hp.uniform(0.1, 0.3), teacher_forcing=False, logs_dir="/tmp/auto_seq2seq", cpus_per_trial=2, name="auto_seq2seq") return auto_seq2seq
def test_select_feature(self): sample_num = np.random.randint(100, 200) df = pd.DataFrame({ "datetime": pd.date_range('1/1/2019', periods=sample_num), "value": np.random.randn(sample_num), "id": np.array(['00'] * sample_num) }) train_ts, val_ts, _ = TSDataset.from_pandas(df, target_col=['value'], dt_col='datetime', id_col='id', with_split=True, val_ratio=0.1) search_space = { 'hidden_dim': hp.grid_search([32, 64]), 'layer_num': hp.randint(1, 3), 'lr': hp.choice([0.001, 0.003, 0.01]), 'dropout': hp.uniform(0.1, 0.2) } input_feature_dim, output_feature_dim = 1, 1 auto_estimator = AutoTSEstimator(model='lstm', search_space=search_space, past_seq_len=6, future_seq_len=1, input_feature_num=input_feature_dim, output_target_num=output_feature_dim, selected_features="auto", metric="mse", loss=torch.nn.MSELoss(), cpus_per_trial=2, name="auto_trainer") auto_estimator.fit(data=train_ts, epochs=1, batch_size=hp.choice([32, 64]), validation_data=val_ts, n_sampling=1) config = auto_estimator.get_best_config() assert config['past_seq_len'] == 6
args = parser.parse_args() num_nodes = 1 if args.cluster_mode == "local" else args.num_workers init_orca_context(cluster_mode=args.cluster_mode, cores=args.cores, memory=args.memory, num_nodes=num_nodes, init_ray_on_spark=True) tsdata_train, tsdata_valid, tsdata_test = get_tsdata() auto_lstm = AutoLSTM(input_feature_num=1, output_target_num=1, past_seq_len=20, hidden_dim=hp.grid_search([32, 64]), layer_num=hp.randint(1, 3), lr=hp.choice([0.01, 0.03, 0.1]), dropout=hp.uniform(0.1, 0.2), optimizer='Adam', loss=torch.nn.MSELoss(), metric="mse") x_train, y_train = tsdata_train.roll(lookback=20, horizon=1).to_numpy() x_val, y_val = tsdata_test.roll(lookback=20, horizon=1).to_numpy() x_test, y_test = tsdata_test.roll(lookback=20, horizon=1).to_numpy() auto_lstm.fit(data=(x_train, y_train), epochs=args.epochs, validation_data=(x_val, y_val)) yhat = auto_lstm.predict(x_test)
def test_fit_seq2seq_feature(self): from sklearn.preprocessing import StandardScaler scaler = StandardScaler() tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True) tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler, fit=False) auto_estimator = AutoTSEstimator(model='seq2seq', search_space="minimal", past_seq_len=hp.randint(4, 6), future_seq_len=1, selected_features="auto", metric="mse", optimizer="Adam", loss=torch.nn.MSELoss(), logs_dir="/tmp/auto_trainer", cpus_per_trial=2, name="auto_trainer") ts_pipeline = auto_estimator.fit(data=tsdata_train, epochs=1, batch_size=hp.choice([32, 64]), validation_data=tsdata_valid, n_sampling=1) best_config = auto_estimator.get_best_config() best_model = auto_estimator._get_best_automl_model() assert 4 <= best_config["past_seq_len"] <= 6 assert isinstance(ts_pipeline, TSPipeline) # use raw base model to predic and evaluate tsdata_valid.roll(lookback=best_config["past_seq_len"], horizon=0, feature_col=best_config["selected_features"]) x_valid, y_valid = tsdata_valid.to_numpy() y_pred_raw = best_model.predict(x_valid) y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw) # use tspipeline to predic and evaluate eval_result = ts_pipeline.evaluate(tsdata_valid) y_pred = ts_pipeline.predict(tsdata_valid) # check if they are the same np.testing.assert_almost_equal(y_pred, y_pred_raw) # save and load ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_seq2seq") new_ts_pipeline = TSPipeline.load( "/tmp/auto_trainer/autots_tmp_model_seq2seq") # check if load ppl is the same as previous eval_result_new = new_ts_pipeline.evaluate(tsdata_valid) y_pred_new = new_ts_pipeline.predict(tsdata_valid) np.testing.assert_almost_equal(eval_result[0], eval_result_new[0]) np.testing.assert_almost_equal(y_pred, y_pred_new) # check if load ppl is the same as previous with onnx try: import onnx import onnxruntime eval_result_new_onnx = new_ts_pipeline.evaluate_with_onnx( tsdata_valid) y_pred_new_onnx = new_ts_pipeline.predict_with_onnx(tsdata_valid) np.testing.assert_almost_equal(eval_result[0], eval_result_new_onnx[0], decimal=5) np.testing.assert_almost_equal(y_pred, y_pred_new_onnx, decimal=5) except ImportError: pass # use tspipeline to incrementally train new_ts_pipeline.fit(tsdata_valid)
random_state=2) config = {'random_state': 2} recipe = None num_rand_samples = 1 n_estimators_range = (800, 1000) max_depth_range = (10, 15) lr = (1e-4, 1e-1) min_child_weight = [1, 2, 3] if opt.mode == 'skopt': search_space = { "n_estimators": hp.randint(n_estimators_range[0], n_estimators_range[1]), "max_depth": hp.randint(max_depth_range[0], max_depth_range[1]), "lr": hp.loguniform(lr[0], lr[-1]), "min_child_weight": hp.choice(min_child_weight), } search_alg = "skopt" search_alg_params = None scheduler = "AsyncHyperBand" scheduler_params = dict( max_t=50, grace_period=1, reduction_factor=3, brackets=3,
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import bigdl.orca.automl.hp as hp AUTO_MODEL_SUPPORT_LIST = ["lstm", "tcn", "seq2seq"] AUTO_MODEL_DEFAULT_SEARCH_SPACE = { "lstm": {"minimal": {"hidden_dim": hp.grid_search([16, 32]), "layer_num": hp.randint(1, 2), "lr": hp.loguniform(0.001, 0.005), "dropout": hp.uniform(0.1, 0.2)}, "normal": {"hidden_dim": hp.grid_search([16, 32, 64]), "layer_num": hp.grid_search([1, 2]), "lr": hp.loguniform(0.0005, 0.01), "dropout": hp.uniform(0, 0.2)}, "large": {"hidden_dim": hp.grid_search([16, 32, 64, 128]), "layer_num": hp.grid_search([1, 2, 3, 4]), "lr": hp.loguniform(0.0005, 0.01), "dropout": hp.uniform(0, 0.3)}}, "tcn": {"minimal": {"hidden_units": hp.grid_search([16, 32]), "levels": hp.randint(4, 6), "kernel_size": 3, "lr": hp.loguniform(0.001, 0.005),
def get_xgb_search_space(): return { "n_estimators": hp.randint(5, 10), "max_depth": hp.randint(2, 5), "lr": hp.loguniform(1e-4, 1e-1), }