예제 #1
0
    def test_create_not_torch_model(self):
        def model_creator(config):
            return torch.Tensor(3, 5)

        modelBuilder = PytorchModelBuilder(model_creator=model_creator,
                                           optimizer_creator=optimizer_creator,
                                           loss_creator=loss_creator)
        with pytest.raises(ValueError):
            model = modelBuilder.build(config={
                "lr": 1e-2,
                "batch_size": 32,
            })
예제 #2
0
 def test_fit_evaluate(self):
     metric_name = "rmse"
     modelBuilder = PytorchModelBuilder(model_creator=model_creator_pytorch,
                                        optimizer_creator=optimizer_creator,
                                        loss_creator=loss_creator)
     model = modelBuilder.build(config={
         "lr": 1e-2,
         "batch_size": 32,
     })
     val_result = model.fit_eval(data=(self.data["x"], self.data["y"]),
                                 validation_data=(self.data["val_x"],
                                                  self.data["val_y"]),
                                 metric=metric_name,
                                 epochs=20)
     assert val_result.get(metric_name)
예제 #3
0
def prepare_searcher(data,
                     search_space,
                     stop,
                     validation_data=None,
                     model_creator=linear_model_creator,
                     optimizer_creator=optimizer_creator,
                     loss_creator=loss_creator,
                     metric="mse",
                     metric_mode="min",
                     name="demo"):
    modelBuilder = PytorchModelBuilder(model_creator=model_creator,
                                       optimizer_creator=optimizer_creator,
                                       loss_creator=loss_creator)
    searcher = SearchEngineFactory.create_engine(
        backend="ray",
        logs_dir="~/zoo_automl_logs",
        resources_per_trial={"cpu": 2},
        name=name)
    searcher.compile(data=data,
                     validation_data=validation_data,
                     model_builder=modelBuilder,
                     search_space=search_space,
                     n_sampling=2,
                     epochs=stop["training_iteration"],
                     metric_threshold=stop["reward_metric"],
                     metric_mode=metric_mode,
                     metric=metric)
    return searcher
예제 #4
0
 def test_dataloader_fit_evaluate(self):
     metric_name = "rmse"
     modelBuilder = PytorchModelBuilder(model_creator=model_creator_pytorch,
                                        optimizer_creator=optimizer_creator,
                                        loss_creator=loss_creator)
     model = modelBuilder.build(
         config={
             "lr": 1e-2,
             "batch_size": 32,
             "train_size": 500,
             "valid_size": 100,
             "shuffle": True
         })
     val_result = model.fit_eval(data=train_dataloader_creator,
                                 validation_data=valid_dataloader_creator,
                                 metric=metric_name,
                                 epochs=20)
     assert model.config["train_size"] == 500
     assert model.config["valid_size"] == 100
     assert model.config["shuffle"] is True
     assert val_result.get(metric_name)
예제 #5
0
 def test_evaluate(self):
     modelBuilder = PytorchModelBuilder(model_creator=model_creator_pytorch,
                                        optimizer_creator=optimizer_creator,
                                        loss_creator=loss_creator)
     model = modelBuilder.build(config={
         "lr": 1e-2,
         "batch_size": 32,
     })
     model.fit_eval(data=(self.data["x"], self.data["y"]),
                    validation_data=(self.data["val_x"],
                                     self.data["val_y"]),
                    metric="rmse",
                    epochs=20)
     mse_eval = model.evaluate(x=self.data["val_x"], y=self.data["val_y"])
     try:
         import onnx
         import onnxruntime
         mse_eval_onnx = model.evaluate_with_onnx(x=self.data["val_x"],
                                                  y=self.data["val_y"])
         np.testing.assert_almost_equal(mse_eval, mse_eval_onnx)
     except ImportError:
         pass
     # incremental training test
     model.fit_eval(data=(self.data["x"], self.data["y"]),
                    validation_data=(self.data["val_x"],
                                     self.data["val_y"]),
                    metric="rmse",
                    epochs=20)
     mse_eval = model.evaluate(x=self.data["val_x"], y=self.data["val_y"])
     try:
         import onnx
         import onnxruntime
         mse_eval_onnx = model.evaluate_with_onnx(x=self.data["val_x"],
                                                  y=self.data["val_y"])
         np.testing.assert_almost_equal(mse_eval, mse_eval_onnx)
     except ImportError:
         pass
예제 #6
0
 def test_predict(self):
     modelBuilder = PytorchModelBuilder(model_creator=model_creator_pytorch,
                                        optimizer_creator=optimizer_creator,
                                        loss_creator=loss_creator)
     model = modelBuilder.build(config={
         "lr": 1e-2,
         "batch_size": 32,
     })
     model.fit_eval(data=(self.data["x"], self.data["y"]),
                    validation_data=(self.data["val_x"],
                                     self.data["val_y"]),
                    metric="rmse",
                    epochs=20)
     pred = model.predict(x=self.data["val_x"])
     pred_full_batch = model.predict(x=self.data["val_x"],
                                     batch_size=len(self.data["val_x"]))
     np.testing.assert_almost_equal(pred, pred_full_batch)
     try:
         import onnx
         import onnxruntime
         pred_onnx = model.predict_with_onnx(x=self.data["val_x"])
         np.testing.assert_almost_equal(pred, pred_onnx)
     except ImportError:
         pass
예제 #7
0
    def from_torch(
        *,
        model_creator,
        optimizer,
        loss,
        logs_dir="/tmp/auto_estimator_logs",
        resources_per_trial=None,
        name="auto_pytorch_estimator",
        remote_dir=None,
    ):
        """
        Create an AutoEstimator for torch.

        :param model_creator: PyTorch model creator function.
        :param optimizer: PyTorch optimizer creator function or pytorch optimizer name (string).
            Note that you should specify learning rate search space with key as "lr" or LR_NAME
            (from zoo.orca.automl.pytorch_utils import LR_NAME) if input optimizer name.
            Without learning rate search space specified, the default learning rate value of 1e-3
            will be used for all estimators.
        :param loss: PyTorch loss instance or PyTorch loss creator function
            or pytorch loss name (string).
        :param logs_dir: Local directory to save logs and results. It defaults to
            "/tmp/auto_estimator_logs"
        :param resources_per_trial: Dict. resources for each trial. e.g. {"cpu": 2}.
        :param name: Name of the auto estimator. It defaults to "auto_pytorch_estimator"
        :param remote_dir: String. Remote directory to sync training results and checkpoints. It
            defaults to None and doesn't take effects while running in local. While running in
            cluster, it defaults to "hdfs:///tmp/{name}".

        :return: an AutoEstimator object.
        """
        from zoo.orca.automl.model.base_pytorch_model import PytorchModelBuilder
        model_builder = PytorchModelBuilder(model_creator=model_creator,
                                            optimizer_creator=optimizer,
                                            loss_creator=loss)

        return AutoEstimator(model_builder=model_builder,
                             logs_dir=logs_dir,
                             resources_per_trial=resources_per_trial,
                             remote_dir=remote_dir,
                             name=name)
예제 #8
0
    def __init__(
        self,
        input_feature_num,
        output_target_num,
        past_seq_len,
        future_seq_len,
        optimizer,
        loss,
        metric,
        hidden_units=None,
        levels=None,
        num_channels=None,
        kernel_size=7,
        lr=0.001,
        dropout=0.2,
        backend="torch",
        logs_dir="/tmp/auto_tcn",
        cpus_per_trial=1,
        name="auto_tcn",
        remote_dir=None,
    ):
        """
        Create an AutoTCN.

        :param input_feature_num: Int. The number of features in the input
        :param output_target_num: Int. The number of targets in the output
        :param past_seq_len: Int. The number of historical steps used for forecasting.
        :param future_seq_len: Int. The number of future steps to forecast.
        :param optimizer: String or pyTorch optimizer creator function or
               tf.keras optimizer instance.
        :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function.
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param hidden_units: Int or hp sampling function from an integer space. The number of hidden
               units or filters for each convolutional layer. It is similar to `units` for LSTM.
               It defaults to 30. We will omit the hidden_units value if num_channels is specified.
               For hp sampling, see zoo.orca.automl.hp for more details.
               e.g. hp.grid_search([32, 64]).
        :param levels: Int or hp sampling function from an integer space. The number of levels of
               TemporalBlocks to use. It defaults to 8. We will omit the levels value if
               num_channels is specified.
        :param num_channels: List of integers. A list of hidden_units for each level. You could
               specify num_channels if you want different hidden_units for different levels.
               By default, num_channels equals to
               [hidden_units] * (levels - 1) + [output_target_num].
        :param kernel_size: Int or hp sampling function from an integer space.
               The size of the kernel to use in each convolutional layer.
        :param lr: float or hp sampling function from a float space. Learning rate.
               e.g. hp.choice([0.001, 0.003, 0.01])
        :param dropout: float or hp sampling function from a float space. Learning rate. Dropout
               rate. e.g. hp.uniform(0.1, 0.3)
        :param backend: The backend of the TCN model. We only support backend as "torch" for now.
        :param logs_dir: Local directory to save logs and results. It defaults to "/tmp/auto_tcn"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoTCN. It defaults to "auto_tcn"
        :param remote_dir: String. Remote directory to sync training results and checkpoints. It
            defaults to None and doesn't take effects while running in local. While running in
            cluster, it defaults to "hdfs:///tmp/{name}".
        """
        super().__init__()
        # todo: support search for past_seq_len.
        # todo: add input check.
        if backend != "torch":
            raise ValueError(
                f"We only support backend as torch. Got {backend}")
        self.search_space = dict(
            input_feature_num=input_feature_num,
            output_feature_num=output_target_num,
            past_seq_len=past_seq_len,
            future_seq_len=future_seq_len,
            nhid=hidden_units,
            levels=levels,
            num_channels=num_channels,
            kernel_size=kernel_size,
            lr=lr,
            dropout=dropout,
        )
        self.metric = metric
        model_builder = PytorchModelBuilder(
            model_creator=model_creator,
            optimizer_creator=optimizer,
            loss_creator=loss,
        )
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            remote_dir=remote_dir,
            name=name)
예제 #9
0
    def __init__(
        self,
        input_feature_num,
        output_target_num,
        past_seq_len,
        future_seq_len,
        optimizer,
        loss,
        metric,
        lr=0.001,
        lstm_hidden_dim=128,
        lstm_layer_num=2,
        dropout=0.25,
        teacher_forcing=False,
        backend="torch",
        logs_dir="/tmp/auto_seq2seq",
        cpus_per_trial=1,
        name="auto_seq2seq",
        remote_dir=None,
    ):
        """
        Create an AutoSeq2Seq.

        :param input_feature_num: Int. The number of features in the input
        :param output_target_num: Int. The number of targets in the output
        :param past_seq_len: Int. The number of historical steps used for forecasting.
        :param future_seq_len: Int. The number of future steps to forecast.
        :param optimizer: String or pyTorch optimizer creator function or
               tf.keras optimizer instance.
        :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function.
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param lr: float or hp sampling function from a float space. Learning rate.
               e.g. hp.choice([0.001, 0.003, 0.01])
        :param lstm_hidden_dim: LSTM hidden channel for decoder and encoder.
               hp.grid_search([32, 64, 128])
        :param lstm_layer_num: LSTM layer number for decoder and encoder.
               e.g. hp.randint(1, 4)
        :param dropout: float or hp sampling function from a float space. Learning rate. Dropout
               rate. e.g. hp.uniform(0.1, 0.3)
        :param teacher_forcing: If use teacher forcing in training. e.g. hp.choice([True, False])
        :param backend: The backend of the Seq2Seq model. We only support backend as "torch"
               for now.
        :param logs_dir: Local directory to save logs and results. It defaults to
               "/tmp/auto_seq2seq"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoSeq2Seq. It defaults to "auto_seq2seq"
        :param remote_dir: String. Remote directory to sync training results and checkpoints. It
            defaults to None and doesn't take effects while running in local. While running in
            cluster, it defaults to "hdfs:///tmp/{name}".
        """
        super().__init__()
        # todo: support search for past_seq_len.
        # todo: add input check.
        if backend != "torch":
            raise ValueError(
                f"We only support backend as torch. Got {backend}")
        self.search_space = dict(input_feature_num=input_feature_num,
                                 output_feature_num=output_target_num,
                                 past_seq_len=past_seq_len,
                                 future_seq_len=future_seq_len,
                                 lstm_hidden_dim=lstm_hidden_dim,
                                 lstm_layer_num=lstm_layer_num,
                                 lr=lr,
                                 dropout=dropout,
                                 teacher_forcing=teacher_forcing)
        self.metric = metric
        model_builder = PytorchModelBuilder(
            model_creator=model_creator,
            optimizer_creator=optimizer,
            loss_creator=loss,
        )
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            remote_dir=remote_dir,
            name=name)
예제 #10
0
    def __init__(
        self,
        input_feature_num,
        output_target_num,
        past_seq_len,
        optimizer,
        loss,
        metric,
        hidden_dim=32,
        layer_num=1,
        lr=0.001,
        dropout=0.2,
        backend="torch",
        logs_dir="/tmp/auto_lstm",
        cpus_per_trial=1,
        name="auto_lstm",
        remote_dir=None,
    ):
        """
        Create an AutoLSTM.

        :param input_feature_num: Int. The number of features in the input
        :param output_target_num: Int. The number of targets in the output
        :param past_seq_len: Int or hp sampling function The number of historical
               steps used for forecasting.
        :param optimizer: String or pyTorch optimizer creator function or
               tf.keras optimizer instance.
        :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function.
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param hidden_dim: Int or hp sampling function from an integer space. The number of features
               in the hidden state `h`. For hp sampling, see zoo.chronos.orca.automl.hp for more
               details. e.g. hp.grid_search([32, 64]).
        :param layer_num: Int or hp sampling function from an integer space. Number of recurrent
               layers. e.g. hp.randint(1, 3)
        :param lr: float or hp sampling function from a float space. Learning rate.
               e.g. hp.choice([0.001, 0.003, 0.01])
        :param dropout: float or hp sampling function from a float space. Learning rate. Dropout
               rate. e.g. hp.uniform(0.1, 0.3)
        :param backend: The backend of the lstm model. We only support backend as "torch" for now.
        :param logs_dir: Local directory to save logs and results. It defaults to "/tmp/auto_lstm"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoLSTM. It defaults to "auto_lstm"
        :param remote_dir: String. Remote directory to sync training results and checkpoints. It
            defaults to None and doesn't take effects while running in local. While running in
            cluster, it defaults to "hdfs:///tmp/{name}".
        """
        super().__init__()
        # todo: support backend = 'keras'
        if backend != "torch":
            raise ValueError(
                f"We only support backend as torch. Got {backend}")
        self.search_space = dict(hidden_dim=hidden_dim,
                                 layer_num=layer_num,
                                 lr=lr,
                                 dropout=dropout,
                                 input_feature_num=input_feature_num,
                                 output_feature_num=output_target_num,
                                 past_seq_len=past_seq_len,
                                 future_seq_len=1)
        self.metric = metric
        model_builder = PytorchModelBuilder(
            model_creator=model_creator,
            optimizer_creator=optimizer,
            loss_creator=loss,
        )
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            remote_dir=remote_dir,
            name=name)