def test_create_not_torch_model(self): def model_creator(config): return torch.Tensor(3, 5) modelBuilder = PytorchModelBuilder(model_creator=model_creator, optimizer_creator=optimizer_creator, loss_creator=loss_creator) with pytest.raises(ValueError): model = modelBuilder.build(config={ "lr": 1e-2, "batch_size": 32, })
def test_fit_evaluate(self): metric_name = "rmse" modelBuilder = PytorchModelBuilder(model_creator=model_creator_pytorch, optimizer_creator=optimizer_creator, loss_creator=loss_creator) model = modelBuilder.build(config={ "lr": 1e-2, "batch_size": 32, }) val_result = model.fit_eval(data=(self.data["x"], self.data["y"]), validation_data=(self.data["val_x"], self.data["val_y"]), metric=metric_name, epochs=20) assert val_result.get(metric_name)
def prepare_searcher(data, search_space, stop, validation_data=None, model_creator=linear_model_creator, optimizer_creator=optimizer_creator, loss_creator=loss_creator, metric="mse", metric_mode="min", name="demo"): modelBuilder = PytorchModelBuilder(model_creator=model_creator, optimizer_creator=optimizer_creator, loss_creator=loss_creator) searcher = SearchEngineFactory.create_engine( backend="ray", logs_dir="~/zoo_automl_logs", resources_per_trial={"cpu": 2}, name=name) searcher.compile(data=data, validation_data=validation_data, model_builder=modelBuilder, search_space=search_space, n_sampling=2, epochs=stop["training_iteration"], metric_threshold=stop["reward_metric"], metric_mode=metric_mode, metric=metric) return searcher
def test_dataloader_fit_evaluate(self): metric_name = "rmse" modelBuilder = PytorchModelBuilder(model_creator=model_creator_pytorch, optimizer_creator=optimizer_creator, loss_creator=loss_creator) model = modelBuilder.build( config={ "lr": 1e-2, "batch_size": 32, "train_size": 500, "valid_size": 100, "shuffle": True }) val_result = model.fit_eval(data=train_dataloader_creator, validation_data=valid_dataloader_creator, metric=metric_name, epochs=20) assert model.config["train_size"] == 500 assert model.config["valid_size"] == 100 assert model.config["shuffle"] is True assert val_result.get(metric_name)
def test_evaluate(self): modelBuilder = PytorchModelBuilder(model_creator=model_creator_pytorch, optimizer_creator=optimizer_creator, loss_creator=loss_creator) model = modelBuilder.build(config={ "lr": 1e-2, "batch_size": 32, }) model.fit_eval(data=(self.data["x"], self.data["y"]), validation_data=(self.data["val_x"], self.data["val_y"]), metric="rmse", epochs=20) mse_eval = model.evaluate(x=self.data["val_x"], y=self.data["val_y"]) try: import onnx import onnxruntime mse_eval_onnx = model.evaluate_with_onnx(x=self.data["val_x"], y=self.data["val_y"]) np.testing.assert_almost_equal(mse_eval, mse_eval_onnx) except ImportError: pass # incremental training test model.fit_eval(data=(self.data["x"], self.data["y"]), validation_data=(self.data["val_x"], self.data["val_y"]), metric="rmse", epochs=20) mse_eval = model.evaluate(x=self.data["val_x"], y=self.data["val_y"]) try: import onnx import onnxruntime mse_eval_onnx = model.evaluate_with_onnx(x=self.data["val_x"], y=self.data["val_y"]) np.testing.assert_almost_equal(mse_eval, mse_eval_onnx) except ImportError: pass
def test_predict(self): modelBuilder = PytorchModelBuilder(model_creator=model_creator_pytorch, optimizer_creator=optimizer_creator, loss_creator=loss_creator) model = modelBuilder.build(config={ "lr": 1e-2, "batch_size": 32, }) model.fit_eval(data=(self.data["x"], self.data["y"]), validation_data=(self.data["val_x"], self.data["val_y"]), metric="rmse", epochs=20) pred = model.predict(x=self.data["val_x"]) pred_full_batch = model.predict(x=self.data["val_x"], batch_size=len(self.data["val_x"])) np.testing.assert_almost_equal(pred, pred_full_batch) try: import onnx import onnxruntime pred_onnx = model.predict_with_onnx(x=self.data["val_x"]) np.testing.assert_almost_equal(pred, pred_onnx) except ImportError: pass
def from_torch( *, model_creator, optimizer, loss, logs_dir="/tmp/auto_estimator_logs", resources_per_trial=None, name="auto_pytorch_estimator", remote_dir=None, ): """ Create an AutoEstimator for torch. :param model_creator: PyTorch model creator function. :param optimizer: PyTorch optimizer creator function or pytorch optimizer name (string). Note that you should specify learning rate search space with key as "lr" or LR_NAME (from zoo.orca.automl.pytorch_utils import LR_NAME) if input optimizer name. Without learning rate search space specified, the default learning rate value of 1e-3 will be used for all estimators. :param loss: PyTorch loss instance or PyTorch loss creator function or pytorch loss name (string). :param logs_dir: Local directory to save logs and results. It defaults to "/tmp/auto_estimator_logs" :param resources_per_trial: Dict. resources for each trial. e.g. {"cpu": 2}. :param name: Name of the auto estimator. It defaults to "auto_pytorch_estimator" :param remote_dir: String. Remote directory to sync training results and checkpoints. It defaults to None and doesn't take effects while running in local. While running in cluster, it defaults to "hdfs:///tmp/{name}". :return: an AutoEstimator object. """ from zoo.orca.automl.model.base_pytorch_model import PytorchModelBuilder model_builder = PytorchModelBuilder(model_creator=model_creator, optimizer_creator=optimizer, loss_creator=loss) return AutoEstimator(model_builder=model_builder, logs_dir=logs_dir, resources_per_trial=resources_per_trial, remote_dir=remote_dir, name=name)
def __init__( self, input_feature_num, output_target_num, past_seq_len, future_seq_len, optimizer, loss, metric, hidden_units=None, levels=None, num_channels=None, kernel_size=7, lr=0.001, dropout=0.2, backend="torch", logs_dir="/tmp/auto_tcn", cpus_per_trial=1, name="auto_tcn", remote_dir=None, ): """ Create an AutoTCN. :param input_feature_num: Int. The number of features in the input :param output_target_num: Int. The number of targets in the output :param past_seq_len: Int. The number of historical steps used for forecasting. :param future_seq_len: Int. The number of future steps to forecast. :param optimizer: String or pyTorch optimizer creator function or tf.keras optimizer instance. :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function. :param metric: String. The evaluation metric name to optimize. e.g. "mse" :param hidden_units: Int or hp sampling function from an integer space. The number of hidden units or filters for each convolutional layer. It is similar to `units` for LSTM. It defaults to 30. We will omit the hidden_units value if num_channels is specified. For hp sampling, see zoo.orca.automl.hp for more details. e.g. hp.grid_search([32, 64]). :param levels: Int or hp sampling function from an integer space. The number of levels of TemporalBlocks to use. It defaults to 8. We will omit the levels value if num_channels is specified. :param num_channels: List of integers. A list of hidden_units for each level. You could specify num_channels if you want different hidden_units for different levels. By default, num_channels equals to [hidden_units] * (levels - 1) + [output_target_num]. :param kernel_size: Int or hp sampling function from an integer space. The size of the kernel to use in each convolutional layer. :param lr: float or hp sampling function from a float space. Learning rate. e.g. hp.choice([0.001, 0.003, 0.01]) :param dropout: float or hp sampling function from a float space. Learning rate. Dropout rate. e.g. hp.uniform(0.1, 0.3) :param backend: The backend of the TCN model. We only support backend as "torch" for now. :param logs_dir: Local directory to save logs and results. It defaults to "/tmp/auto_tcn" :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1. :param name: name of the AutoTCN. It defaults to "auto_tcn" :param remote_dir: String. Remote directory to sync training results and checkpoints. It defaults to None and doesn't take effects while running in local. While running in cluster, it defaults to "hdfs:///tmp/{name}". """ super().__init__() # todo: support search for past_seq_len. # todo: add input check. if backend != "torch": raise ValueError( f"We only support backend as torch. Got {backend}") self.search_space = dict( input_feature_num=input_feature_num, output_feature_num=output_target_num, past_seq_len=past_seq_len, future_seq_len=future_seq_len, nhid=hidden_units, levels=levels, num_channels=num_channels, kernel_size=kernel_size, lr=lr, dropout=dropout, ) self.metric = metric model_builder = PytorchModelBuilder( model_creator=model_creator, optimizer_creator=optimizer, loss_creator=loss, ) self.auto_est = AutoEstimator( model_builder=model_builder, logs_dir=logs_dir, resources_per_trial={"cpu": cpus_per_trial}, remote_dir=remote_dir, name=name)
def __init__( self, input_feature_num, output_target_num, past_seq_len, future_seq_len, optimizer, loss, metric, lr=0.001, lstm_hidden_dim=128, lstm_layer_num=2, dropout=0.25, teacher_forcing=False, backend="torch", logs_dir="/tmp/auto_seq2seq", cpus_per_trial=1, name="auto_seq2seq", remote_dir=None, ): """ Create an AutoSeq2Seq. :param input_feature_num: Int. The number of features in the input :param output_target_num: Int. The number of targets in the output :param past_seq_len: Int. The number of historical steps used for forecasting. :param future_seq_len: Int. The number of future steps to forecast. :param optimizer: String or pyTorch optimizer creator function or tf.keras optimizer instance. :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function. :param metric: String. The evaluation metric name to optimize. e.g. "mse" :param lr: float or hp sampling function from a float space. Learning rate. e.g. hp.choice([0.001, 0.003, 0.01]) :param lstm_hidden_dim: LSTM hidden channel for decoder and encoder. hp.grid_search([32, 64, 128]) :param lstm_layer_num: LSTM layer number for decoder and encoder. e.g. hp.randint(1, 4) :param dropout: float or hp sampling function from a float space. Learning rate. Dropout rate. e.g. hp.uniform(0.1, 0.3) :param teacher_forcing: If use teacher forcing in training. e.g. hp.choice([True, False]) :param backend: The backend of the Seq2Seq model. We only support backend as "torch" for now. :param logs_dir: Local directory to save logs and results. It defaults to "/tmp/auto_seq2seq" :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1. :param name: name of the AutoSeq2Seq. It defaults to "auto_seq2seq" :param remote_dir: String. Remote directory to sync training results and checkpoints. It defaults to None and doesn't take effects while running in local. While running in cluster, it defaults to "hdfs:///tmp/{name}". """ super().__init__() # todo: support search for past_seq_len. # todo: add input check. if backend != "torch": raise ValueError( f"We only support backend as torch. Got {backend}") self.search_space = dict(input_feature_num=input_feature_num, output_feature_num=output_target_num, past_seq_len=past_seq_len, future_seq_len=future_seq_len, lstm_hidden_dim=lstm_hidden_dim, lstm_layer_num=lstm_layer_num, lr=lr, dropout=dropout, teacher_forcing=teacher_forcing) self.metric = metric model_builder = PytorchModelBuilder( model_creator=model_creator, optimizer_creator=optimizer, loss_creator=loss, ) self.auto_est = AutoEstimator( model_builder=model_builder, logs_dir=logs_dir, resources_per_trial={"cpu": cpus_per_trial}, remote_dir=remote_dir, name=name)
def __init__( self, input_feature_num, output_target_num, past_seq_len, optimizer, loss, metric, hidden_dim=32, layer_num=1, lr=0.001, dropout=0.2, backend="torch", logs_dir="/tmp/auto_lstm", cpus_per_trial=1, name="auto_lstm", remote_dir=None, ): """ Create an AutoLSTM. :param input_feature_num: Int. The number of features in the input :param output_target_num: Int. The number of targets in the output :param past_seq_len: Int or hp sampling function The number of historical steps used for forecasting. :param optimizer: String or pyTorch optimizer creator function or tf.keras optimizer instance. :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function. :param metric: String. The evaluation metric name to optimize. e.g. "mse" :param hidden_dim: Int or hp sampling function from an integer space. The number of features in the hidden state `h`. For hp sampling, see zoo.chronos.orca.automl.hp for more details. e.g. hp.grid_search([32, 64]). :param layer_num: Int or hp sampling function from an integer space. Number of recurrent layers. e.g. hp.randint(1, 3) :param lr: float or hp sampling function from a float space. Learning rate. e.g. hp.choice([0.001, 0.003, 0.01]) :param dropout: float or hp sampling function from a float space. Learning rate. Dropout rate. e.g. hp.uniform(0.1, 0.3) :param backend: The backend of the lstm model. We only support backend as "torch" for now. :param logs_dir: Local directory to save logs and results. It defaults to "/tmp/auto_lstm" :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1. :param name: name of the AutoLSTM. It defaults to "auto_lstm" :param remote_dir: String. Remote directory to sync training results and checkpoints. It defaults to None and doesn't take effects while running in local. While running in cluster, it defaults to "hdfs:///tmp/{name}". """ super().__init__() # todo: support backend = 'keras' if backend != "torch": raise ValueError( f"We only support backend as torch. Got {backend}") self.search_space = dict(hidden_dim=hidden_dim, layer_num=layer_num, lr=lr, dropout=dropout, input_feature_num=input_feature_num, output_feature_num=output_target_num, past_seq_len=past_seq_len, future_seq_len=1) self.metric = metric model_builder = PytorchModelBuilder( model_creator=model_creator, optimizer_creator=optimizer, loss_creator=loss, ) self.auto_est = AutoEstimator( model_builder=model_builder, logs_dir=logs_dir, resources_per_trial={"cpu": cpus_per_trial}, remote_dir=remote_dir, name=name)