Пример #1
0
    def _hp_search(self,
                   input_df,
                   validation_df,
                   metric,
                   recipe,
                   mc,
                   resources_per_trial,
                   remote_dir):

        model_builder = self.get_model_builder()

        self.metric = metric
        self._detach_recipe(recipe)

        # prepare parameters for search engine
        auto_est = AutoEstimator(model_builder,
                                 logs_dir=self.logs_dir,
                                 resources_per_trial=resources_per_trial,
                                 name=self.name,
                                 remote_dir=remote_dir)
        auto_est.fit(data=input_df,
                     validation_data=validation_df,
                     search_space=self.search_space,
                     n_sampling=self.num_samples,
                     epochs=self.epochs,
                     metric_threshold=self.metric_threshold,
                     search_alg=self.search_alg,
                     search_alg_params=self.search_alg_params,
                     scheduler=self.scheduler,
                     scheduler_params=self.scheduler_params,
                     metric=metric)

        best_model = auto_est.get_best_model()
        pipeline = TimeSequencePipeline(name=self.name, model=best_model)
        return pipeline
Пример #2
0
    def __init__(self,
                 input_feature_num,
                 output_target_num,
                 optimizer,
                 loss,
                 metric,
                 hidden_dim=32,
                 layer_num=1,
                 lr=0.001,
                 dropout=0.2,
                 backend="torch",
                 logs_dir="/tmp/auto_lstm",
                 cpus_per_trial=1,
                 name="auto_lstm"):
        """
        Create an AutoLSTM.

        :param input_feature_num: Int. The number of features in the input
        :param output_target_num: Int. The number of targets in the output
        :param optimizer: String or pyTorch optimizer creator function or
               tf.keras optimizer instance.
        :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function.
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param hidden_dim: Int or hp sampling function from an integer space. The number of features
               in the hidden state `h`. For hp sampling, see zoo.chronos.orca.automl.hp for more
               details. e.g. hp.grid_search([32, 64]).
        :param layer_num: Int or hp sampling function from an integer space. Number of recurrent
               layers. e.g. hp.randint(1, 3)
        :param lr: float or hp sampling function from a float space. Learning rate.
               e.g. hp.choice([0.001, 0.003, 0.01])
        :param dropout: float or hp sampling function from a float space. Learning rate. Dropout
               rate. e.g. hp.uniform(0.1, 0.3)
        :param backend: The backend of the lstm model. We only support backend as "torch" for now.
        :param logs_dir: Local directory to save logs and results. It defaults to "/tmp/auto_lstm"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoLSTM. It defaults to "auto_lstm"
        """
        # todo: support backend = 'keras'
        if backend != "torch":
            raise ValueError(
                f"We only support backend as torch. Got {backend}")
        self.search_space = dict(
            hidden_dim=hidden_dim,
            layer_num=layer_num,
            lr=lr,
            dropout=dropout,
            input_feature_num=input_feature_num,
            output_feature_num=output_target_num,
        )
        self.metric = metric
        model_builder = PytorchModelBuilder(
            model_creator=model_creator,
            optimizer_creator=optimizer,
            loss_creator=loss,
        )
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            name=name)
Пример #3
0
    def __init__(self,
                 changepoint_prior_scale=0.05,
                 seasonality_prior_scale=10.0,
                 holidays_prior_scale=10.0,
                 seasonality_mode='additive',
                 changepoint_range=0.8,
                 metric='mse',
                 logs_dir="/tmp/auto_prophet_logs",
                 cpus_per_trial=1,
                 name="auto_prophet",
                 **prophet_config):
        """
        Create an automated Prophet Model.
        User need to specify either the exact value or the search space of the
        Prophet model hyperparameters. For details of the Prophet model hyperparameters, refer to
        https://facebook.github.io/prophet/docs/diagnostics.html#hyperparameter-tuning.

        :param changepoint_prior_scale: Int or hp sampling function from an integer space
            for hyperparameter changepoint_prior_scale for the Prophet model.
            For hp sampling, see zoo.chronos.orca.automl.hp for more details.
            e.g. hp.loguniform(0.001, 0.5).
        :param seasonality_prior_scale: hyperparameter seasonality_prior_scale for the
            Prophet model.
            e.g. hp.loguniform(0.01, 10).
        :param holidays_prior_scale: hyperparameter holidays_prior_scale for the
            Prophet model.
            e.g. hp.loguniform(0.01, 10).
        :param seasonality_mode: hyperparameter seasonality_mode for the
            Prophet model.
            e.g. hp.choice(['additive', 'multiplicative']).
        :param changepoint_range: hyperparameter changepoint_range for the
            Prophet model.
            e.g. hp.uniform(0.8, 0.95).
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param logs_dir: Local directory to save logs and results. It defaults to
            "/tmp/auto_prophet_logs"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoProphet. It defaults to "auto_prophet"
        :param prophet_config: Other Prophet hyperparameters.
        """
        self.search_space = {
            "changepoint_prior_scale": changepoint_prior_scale,
            "seasonality_prior_scale": seasonality_prior_scale,
            "holidays_prior_scale": holidays_prior_scale,
            "seasonality_mode": 'additive',
            "changepoint_range": changepoint_range,
        }
        self.metric = metric
        model_builder = ProphetBuilder()
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            name=name)
Пример #4
0
    def test_fit_metric_func(self):
        auto_est = AutoEstimator.from_keras(model_creator=model_creator,
                                            logs_dir="/tmp/zoo_automl_logs",
                                            resources_per_trial={"cpu": 2},
                                            name="test_fit")

        data, validation_data = get_train_val_data()

        def pyrmsle(y_true, y_pred):
            y_pred[y_pred < -1] = -1 + 1e-6
            elements = np.power(np.log1p(y_true) - np.log1p(y_pred), 2)
            return float(np.sqrt(np.sum(elements) / len(y_true)))

        with pytest.raises(ValueError) as exeinfo:
            auto_est.fit(data=data,
                         validation_data=validation_data,
                         search_space=create_linear_search_space(),
                         n_sampling=2,
                         epochs=1,
                         metric=pyrmsle)
        assert "metric_mode" in str(exeinfo)

        auto_est.fit(data=data,
                     validation_data=validation_data,
                     search_space=create_linear_search_space(),
                     n_sampling=2,
                     epochs=1,
                     metric=pyrmsle,
                     metric_mode="min")
Пример #5
0
 def test_fit(self):
     auto_est = AutoEstimator.from_keras(model_creator=model_creator,
                                         logs_dir="/tmp/zoo_automl_logs",
                                         resources_per_trial={"cpu": 2},
                                         name="test_fit")
     data = get_train_val_data()
     auto_est.fit(data, recipe=LinearRecipe(), metric="mse")
     best_model = auto_est.get_best_model()
     assert "hidden_size" in best_model.config
Пример #6
0
 def test_fit_multiple_times(self):
     auto_est = AutoEstimator.from_keras(model_creator=model_creator,
                                         logs_dir="/tmp/zoo_automl_logs",
                                         resources_per_trial={"cpu": 2},
                                         name="test_fit")
     data = get_train_val_data()
     auto_est.fit(data, recipe=LinearRecipe(), metric="mse")
     with pytest.raises(RuntimeError):
         auto_est.fit(data, recipe=LinearRecipe(), metric="mse")
Пример #7
0
 def test_fit_invalid_loss_name(self):
     invalid_loss_name = "MAELoss"
     with pytest.raises(ValueError) as excinfo:
         auto_est = AutoEstimator.from_torch(model_creator=model_creator,
                                             optimizer="SGD",
                                             loss=invalid_loss_name,
                                             logs_dir="/tmp/zoo_automl_logs",
                                             resources_per_trial={"cpu": 2},
                                             name="test_fit")
     assert "valid torch loss name" in str(excinfo)
Пример #8
0
 def test_fit_optimizer_name(self):
     auto_est = AutoEstimator.from_torch(model_creator=model_creator,
                                         optimizer="SGD",
                                         loss="BCELoss",
                                         logs_dir="/tmp/zoo_automl_logs",
                                         resources_per_trial={"cpu": 2},
                                         name="test_fit")
     data = get_train_val_data()
     auto_est.fit(data, recipe=LinearRecipe(), metric="accuracy")
     best_model = auto_est.get_best_model()
     assert best_model.optimizer.__class__.__name__ == "SGD"
Пример #9
0
 def test_fit_multiple_times(self):
     auto_est = AutoEstimator.from_torch(model_creator=model_creator,
                                         optimizer="SGD",
                                         loss="BCELoss",
                                         logs_dir="/tmp/zoo_automl_logs",
                                         resources_per_trial={"cpu": 2},
                                         name="test_fit")
     data = get_train_val_data()
     auto_est.fit(data, recipe=LinearRecipe(), metric="accuracy")
     with pytest.raises(RuntimeError):
         auto_est.fit(data, recipe=LinearRecipe(), metric="accuracy")
Пример #10
0
 def test_fit_loss_name(self):
     auto_est = AutoEstimator.from_torch(model_creator=model_creator,
                                         optimizer=get_optimizer,
                                         loss="BCELoss",
                                         logs_dir="/tmp/zoo_automl_logs",
                                         resources_per_trial={"cpu": 2},
                                         name="test_fit")
     data, validation_data = get_train_val_data()
     auto_est.fit(data=data,
                  validation_data=validation_data,
                  recipe=LinearRecipe(),
                  metric="accuracy")
     best_model = auto_est.get_best_model()
     assert isinstance(best_model.loss_creator, nn.BCELoss)
Пример #11
0
    def test_fit(self):
        auto_est = AutoEstimator.from_keras(model_creator=model_creator,
                                            logs_dir="/tmp/zoo_automl_logs",
                                            resources_per_trial={"cpu": 2},
                                            name="test_fit")

        data, validation_data = get_train_val_data()
        auto_est.fit(data=data,
                     validation_data=validation_data,
                     search_space=create_linear_search_space(),
                     n_sampling=4,
                     epochs=1,
                     metric="mse")
        best_model = auto_est.get_best_model()
        assert "hidden_size" in best_model.config
    def test_fit_optimizer_name(self):
        auto_est = AutoEstimator.from_torch(model_creator=model_creator,
                                            optimizer="SGD",
                                            loss="BCELoss",
                                            logs_dir="/tmp/zoo_automl_logs",
                                            resources_per_trial={"cpu": 2},
                                            name="test_fit")

        data, validation_data = get_train_val_data()
        auto_est.fit(data=data,
                     validation_data=validation_data,
                     search_space=create_linear_search_space(),
                     n_sampling=2,
                     epochs=1,
                     metric="accuracy")
        assert auto_est.get_best_model()
 def test_fit_data_creator(self):
     auto_est = AutoEstimator.from_torch(model_creator=model_creator,
                                         optimizer=get_optimizer,
                                         loss=nn.BCELoss(),
                                         logs_dir="/tmp/zoo_automl_logs",
                                         resources_per_trial={"cpu": 2},
                                         name="test_fit")
     search_space = create_linear_search_space()
     search_space.update({"shuffle": hp.grid_search([True, False])})
     auto_est.fit(data=train_dataloader_creator,
                  validation_data=valid_dataloader_creator,
                  search_space=search_space,
                  n_sampling=2,
                  epochs=1,
                  metric="accuracy")
     assert auto_est.get_best_model()
     best_config = auto_est.get_best_config()
     assert all(k in best_config.keys() for k in search_space.keys())
 def test_fit_data_creator(self):
     auto_est = AutoEstimator.from_torch(model_creator=model_creator,
                                         optimizer=get_optimizer,
                                         loss=nn.BCELoss(),
                                         logs_dir="/tmp/zoo_automl_logs",
                                         resources_per_trial={"cpu": 2},
                                         name="test_fit")
     search_space = create_linear_search_space()
     search_space.update({"shuffle": hp.grid_search([True, False])})
     auto_est.fit(data=train_dataloader_creator,
                  validation_data=valid_dataloader_creator,
                  search_space=search_space,
                  n_sampling=4,
                  epochs=1,
                  metric="accuracy")
     best_model = auto_est.get_best_model()
     assert best_model.optimizer.__class__.__name__ == "SGD"
     assert isinstance(best_model.loss_creator, nn.BCELoss)
    def test_fit(self):
        auto_est = AutoEstimator.from_torch(model_creator=model_creator,
                                            optimizer=get_optimizer,
                                            loss=nn.BCELoss(),
                                            logs_dir="/tmp/zoo_automl_logs",
                                            resources_per_trial={"cpu": 2},
                                            name="test_fit")

        data, validation_data = get_train_val_data()
        auto_est.fit(data=data,
                     validation_data=validation_data,
                     search_space=create_linear_search_space(),
                     n_sampling=4,
                     epochs=1,
                     metric="accuracy")
        best_model = auto_est.get_best_model()
        assert best_model.optimizer.__class__.__name__ == "SGD"
        assert isinstance(best_model.loss_creator, nn.BCELoss)
Пример #16
0
    def test_fit_multiple_times(self):
        auto_est = AutoEstimator.from_keras(model_creator=model_creator,
                                            logs_dir="/tmp/zoo_automl_logs",
                                            resources_per_trial={"cpu": 2},
                                            name="test_fit")

        data, validation_data = get_train_val_data()
        auto_est.fit(data=data,
                     validation_data=validation_data,
                     search_space=create_linear_search_space(),
                     n_sampling=4,
                     epochs=1,
                     metric="mse")
        with pytest.raises(RuntimeError):
            auto_est.fit(data=data,
                         validation_data=validation_data,
                         search_space=create_linear_search_space(),
                         n_sampling=4,
                         epochs=1,
                         metric="mse")
def train_example(args):
    auto_est = AutoEstimator.from_torch(
        model_creator=model_creator,
        optimizer="Adam",
        loss="BCELoss",
        logs_dir="/tmp/zoo_automl_logs",
        resources_per_trial={"cpu": args.cpus_per_trial},
        name="test_fit")
    train_data, val_data = get_train_val_data()
    recipe = LinearRecipe(training_iteration=args.epochs,
                          num_samples=args.trials)
    auto_est.fit(data=train_data,
                 validation_data=val_data,
                 recipe=recipe,
                 metric="accuracy")
    # Choose the best model
    best_model = auto_est.get_best_model()
    best_model_accuracy = best_model.evaluate(x=val_data[0],
                                              y=val_data[1],
                                              metrics=['accuracy'])
    print(f'model accuracy is {best_model_accuracy[0]}')
Пример #18
0
    def test_fit_metric(self):
        auto_est = AutoEstimator.from_torch(model_creator=model_creator,
                                            optimizer=get_optimizer,
                                            loss="BCELoss",
                                            logs_dir="/tmp/zoo_automl_logs",
                                            resources_per_trial={"cpu": 2},
                                            name="test_fit")

        data, validation_data = get_train_val_data()

        def f075(y_true, y_pred):
            from sklearn.metrics import fbeta_score
            y_true = np.squeeze(y_true)
            y_pred = np.squeeze(y_pred)
            if np.any(y_pred != y_pred.astype(int)):
                # y_pred is probability
                if y_pred.ndim == 1:
                    y_pred = np.where(y_pred > 0.5, 1, 0)
                else:
                    y_pred = np.argmax(y_pred, axis=1)
            return fbeta_score(y_true, y_pred, beta=0.75)

        with pytest.raises(ValueError) as exeinfo:
            auto_est.fit(data=data,
                         validation_data=validation_data,
                         search_space=create_linear_search_space(),
                         n_sampling=2,
                         epochs=1,
                         metric=f075)
        assert "metric_mode" in str(exeinfo)

        auto_est.fit(data=data,
                     validation_data=validation_data,
                     search_space=create_linear_search_space(),
                     n_sampling=2,
                     epochs=1,
                     metric=f075,
                     metric_mode="max")
        best_model = auto_est.get_best_model()
def train_example(args):
    auto_est = AutoEstimator.from_torch(
        model_creator=model_creator,
        optimizer="Adam",
        loss="BCELoss",
        logs_dir="/tmp/zoo_automl_logs",
        resources_per_trial={"cpu": args.cpus_per_trial},
        name="test_fit")
    train_data, val_data = get_train_val_data()
    auto_est.fit(data=train_data,
                 epochs=args.epochs,
                 validation_data=val_data,
                 metric="accuracy",
                 n_sampling=args.trials,
                 search_space=create_linear_search_space())
    # Choose the best model
    best_model = auto_est.get_best_model()

    y_hat = best_model(torch.from_numpy(val_data[0]).float()).detach().numpy()
    from zoo.orca.automl.metrics import Evaluator
    accuracy = Evaluator.evaluate(metric="accuracy",
                                  y_true=val_data[1],
                                  y_pred=y_hat)
    print("Evaluate: accuracy is", accuracy)
Пример #20
0
    def __init__(self,
                 model="lstm",
                 search_space=dict(),
                 metric="mse",
                 loss=None,
                 optimizer="Adam",
                 past_seq_len=2,
                 future_seq_len=1,
                 input_feature_num=None,
                 output_target_num=None,
                 selected_features="auto",
                 backend="torch",
                 logs_dir="/tmp/autots_estimator",
                 cpus_per_trial=1,
                 name="autots_estimator"):
        """
        AutoTSEstimator trains a model for time series forecasting.
        Users can choose one of the built-in models, or pass in a customized pytorch or keras model
        for tuning using AutoML.

        :param model: a string or a model creation function.
               A string indicates a built-in model, currently "lstm", "tcn" are supported.
               A model creation function indicates a 3rd party model, the function should take a
               config param and return a torch.nn.Module (backend="torch") / tf model
               (backend="keras").
               If you use chronos.data.TSDataset as data input, the 3rd party
               should have 3 dim input (num_sample, past_seq_len, input_feature_num) and 3 dim
               output (num_sample, future_seq_len, output_feature_num) and use the same key
               in the model creation function. If you use a customized data creator, the output of
               data creator should fit the input of model creation function.
        :param search_space: hyper parameter configurations. Read the API docs for each auto model.
               Some common hyper parameter can be explicitly set in named parameter. search_space
               should contain those parameters other than the keyword arguments in this
               constructor in its key.
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function. The
               default loss function for pytorch backend is nn.MSELoss().
        :param optimizer: String or pyTorch optimizer creator function or
               tf.keras optimizer instance.
        :param past_seq_len: Int or or hp sampling function. The number of historical steps (i.e.
               lookback) used for forecasting. For hp sampling, see zoo.orca.automl.hp for more
               details. The values defaults to 2.
        :param future_seq_len: Int. The number of future steps to forecast. The value defaults
               to 1.
        :param input_feature_num: Int. The number of features in the input. The value is ignored if
               you use chronos.data.TSDataset as input data type.
        :param output_target_num: Int. The number of targets in the output. The value is ignored if
               you use chronos.data.TSDataset as input data type.
        :param selected_features: String. "all" and "auto" are supported for now. For "all",
               all features that are generated are used for each trial. For "auto", a subset
               is sampled randomly from all features for each trial. The parameter is ignored
               if not using chronos.data.TSDataset as input data type. The value defaults
               to "auto".
        :param backend: The backend of the auto model. We only support backend as "torch" for now.
        :param logs_dir: Local directory to save logs and results.
               It defaults to "/tmp/autots_estimator"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the autots estimator. It defaults to "autots_estimator".
        """
        # check backend and set default loss
        if backend != "torch":
            raise ValueError(
                f"We only support backend as torch. Got {backend}")
        else:
            import torch
            if loss is None:
                loss = torch.nn.MSELoss()

        if isinstance(model, types.FunctionType) and backend == "torch":
            # pytorch 3rd party model
            from zoo.orca.automl.auto_estimator import AutoEstimator
            self.model = AutoEstimator.from_torch(
                model_creator=model,
                optimizer=optimizer,
                loss=loss,
                logs_dir=logs_dir,
                resources_per_trial={"cpu": cpus_per_trial},
                name=name)
            self.metric = metric
            search_space.update({
                "past_seq_len": past_seq_len,
                "future_seq_len": future_seq_len,
                "input_feature_num": input_feature_num,
                "output_feature_num": output_target_num
            })
            self.search_space = search_space

        if isinstance(model, str):
            # built-in model
            # update auto model common search space
            search_space.update({
                "past_seq_len": past_seq_len,
                "future_seq_len": future_seq_len,
                "input_feature_num": input_feature_num,
                "output_target_num": output_target_num,
                "loss": loss,
                "metric": metric,
                "optimizer": optimizer,
                "backend": backend,
                "logs_dir": logs_dir,
                "cpus_per_trial": cpus_per_trial,
                "name": name
            })

            # create auto model from name
            self.model = AutoModelFactory.create_auto_model(
                name=model, search_space=search_space)

        # save selected features setting for data creator generation
        self.selected_features = selected_features
        self._scaler = None
        self._scaler_index = None
Пример #21
0
class AutoProphet:

    def __init__(self,
                 changepoint_prior_scale=hp.loguniform(0.001, 0.5),
                 seasonality_prior_scale=hp.loguniform(0.01, 10),
                 holidays_prior_scale=hp.loguniform(0.01, 10),
                 seasonality_mode=hp.choice(['additive', 'multiplicative']),
                 changepoint_range=hp.uniform(0.8, 0.95),
                 metric='mse',
                 logs_dir="/tmp/auto_prophet_logs",
                 cpus_per_trial=1,
                 name="auto_prophet",
                 remote_dir=None,
                 **prophet_config
                 ):
        """
        Create an automated Prophet Model.
        User need to specify either the exact value or the search space of the
        Prophet model hyperparameters. For details of the Prophet model hyperparameters, refer to
        https://facebook.github.io/prophet/docs/diagnostics.html#hyperparameter-tuning.

        :param changepoint_prior_scale: Int or hp sampling function from an integer space
            for hyperparameter changepoint_prior_scale for the Prophet model.
            For hp sampling, see zoo.chronos.orca.automl.hp for more details.
            e.g. hp.loguniform(0.001, 0.5).
        :param seasonality_prior_scale: hyperparameter seasonality_prior_scale for the
            Prophet model.
            e.g. hp.loguniform(0.01, 10).
        :param holidays_prior_scale: hyperparameter holidays_prior_scale for the
            Prophet model.
            e.g. hp.loguniform(0.01, 10).
        :param seasonality_mode: hyperparameter seasonality_mode for the
            Prophet model.
            e.g. hp.choice(['additive', 'multiplicative']).
        :param changepoint_range: hyperparameter changepoint_range for the
            Prophet model.
            e.g. hp.uniform(0.8, 0.95).
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param logs_dir: Local directory to save logs and results. It defaults to
            "/tmp/auto_prophet_logs"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoProphet. It defaults to "auto_prophet"
        :param remote_dir: String. Remote directory to sync training results and checkpoints. It
            defaults to None and doesn't take effects while running in local. While running in
            cluster, it defaults to "hdfs:///tmp/{name}".

        :param prophet_config: Other Prophet hyperparameters.
        """
        self.search_space = {
            "changepoint_prior_scale": changepoint_prior_scale,
            "seasonality_prior_scale": seasonality_prior_scale,
            "holidays_prior_scale": holidays_prior_scale,
            "seasonality_mode": seasonality_mode,
            "changepoint_range": changepoint_range
        }
        self.search_space.update(prophet_config)  # update other configs
        self.metric = metric
        model_builder = ProphetBuilder()
        self.auto_est = AutoEstimator(model_builder=model_builder,
                                      logs_dir=logs_dir,
                                      resources_per_trial={"cpu": cpus_per_trial},
                                      remote_dir=remote_dir,
                                      name=name)

    def fit(self,
            data,
            cross_validation=True,
            expect_horizon=None,
            freq=None,
            metric_threshold=None,
            n_sampling=50,
            search_alg=None,
            search_alg_params=None,
            scheduler=None,
            scheduler_params=None,
            ):
        """
        Automatically fit the model and search for the best hyperparameters.

        :param data: training data, a pandas dataframe with Td rows,
               and 2 columns, with column 'ds' indicating date and column 'y' indicating value
               and Td is the time dimension
        :param cross_validation: bool, if the eval result comes from cross_validation.
               The value is set to True by default. Setting this option to False to
               speed up the process.
        :param expect_horizon: int, validation data will be automatically splited from training
               data, and expect_horizon is the horizon you may need to use once the mode is fitted.
               The value defaults to None, where 10% of training data will be taken
               as the validation data.
        :param freq: the freqency of the training dataframe. the frequency can be anything from the
               pandas list of frequency strings here:
               https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliasesDefaulted
               to None, where an unreliable frequency will be infer implicitly.
        :param metric_threshold: a trial will be terminated when metric threshold is met
        :param n_sampling: Number of times to sample from the search_space. Defaults to 50.
               If hp.grid_search is in search_space, the grid will be repeated n_sampling of times.
               If this is -1, (virtually) infinite samples are generated
               until a stopping condition is met.
        :param search_alg: str, all supported searcher provided by ray tune
               (i.e."variant_generator", "random", "ax", "dragonfly", "skopt",
               "hyperopt", "bayesopt", "bohb", "nevergrad", "optuna", "zoopt" and
               "sigopt")
        :param search_alg_params: extra parameters for searcher algorithm besides search_space,
               metric and searcher mode
        :param scheduler: str, all supported scheduler provided by ray tune
        :param scheduler_params: parameters for scheduler
        """
        if expect_horizon is None:
            expect_horizon = int(0.1*len(data))
        if freq is None:
            assert len(data) >= 2, "The training dataframe should contains more than 2 records."
            assert pd.api.types.is_datetime64_any_dtype(data["ds"].dtypes), \
                "The \"ds\" col should be in datetime 64 type, or you need to set `freq` in fit."
            self._freq = data["ds"].iloc[1] - data["ds"].iloc[0]
        else:
            self._freq = pd.Timedelta(freq)
        expect_horizon_str = str(self._freq * expect_horizon)
        self.search_space.update({"expect_horizon": expect_horizon_str,
                                  "cross_validation": cross_validation})
        train_data = data if cross_validation else data[:len(data)-expect_horizon]
        validation_data = None if cross_validation else data[len(data)-expect_horizon:]
        self.auto_est.fit(data=train_data,
                          validation_data=validation_data,
                          metric=self.metric,
                          metric_threshold=metric_threshold,
                          n_sampling=n_sampling,
                          search_space=self.search_space,
                          search_alg=search_alg,
                          search_alg_params=search_alg_params,
                          scheduler=scheduler,
                          scheduler_params=scheduler_params
                          )
        # use the best config to fit a new prophet model on whole data
        self.best_model = ProphetBuilder().build(self.auto_est.get_best_config())
        self.best_model.model.fit(data)

    def predict(self, horizon=1, freq="D", ds_data=None):
        """
        Predict using the best model after HPO.

        :param horizon: the number of steps forward to predict
        :param freq: the freqency of the predicted dataframe, defaulted to day("D"),
               the frequency can be anything from the pandas list of frequency strings here:
               https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
        :param ds_data: a dataframe that has 1 column 'ds' indicating date.
        """
        if self.best_model.model is None:
            raise RuntimeError(
                "You must call fit or restore first before calling predict!")
        return self.best_model.predict(horizon=horizon, freq=freq, ds_data=ds_data)

    def evaluate(self, data, metrics=['mse']):
        """
        Evaluate using the best model after HPO.

        :param data: evaluation data, a pandas dataframe with Td rows,
            and 2 columns, with column 'ds' indicating date and column 'y' indicating value
            and Td is the time dimension
        :param metrics: A list contains metrics for test/valid data.
        """
        if data is None:
            raise ValueError("Input invalid data of None")
        if self.best_model.model is None:
            raise RuntimeError(
                "You must call fit or restore first before calling evaluate!")
        return self.best_model.evaluate(target=data,
                                        metrics=metrics)

    def save(self, checkpoint_file):
        """
        Save the best model after HPO.

        :param checkpoint_file: The location you want to save the best model, should be a json file
        """
        if self.best_model.model is None:
            raise RuntimeError(
                "You must call fit or restore first before calling save!")
        self.best_model.save(checkpoint_file)

    def restore(self, checkpoint_file):
        """
        Restore the best model after HPO.

        :param checkpoint_file: The checkpoint file location you want to load the best model.
        """
        self.best_model.restore(checkpoint_file)

    def get_best_model(self):
        """
        Get the best Prophet model.
        """
        return self.best_model.model
Пример #22
0
class AutoTCN:
    def __init__(self,
                 input_feature_num,
                 output_target_num,
                 past_seq_len,
                 future_seq_len,
                 optimizer,
                 loss,
                 metric,
                 hidden_units=None,
                 levels=None,
                 num_channels=None,
                 kernel_size=7,
                 lr=0.001,
                 dropout=0.2,
                 backend="torch",
                 logs_dir="/tmp/auto_tcn",
                 cpus_per_trial=1,
                 name="auto_tcn"):
        """
        Create an AutoTCN.

        :param input_feature_num: Int. The number of features in the input
        :param output_target_num: Int. The number of targets in the output
        :param past_seq_len: Int. The number of historical steps used for forecasting.
        :param future_seq_len: Int. The number of future steps to forecast.
        :param optimizer: String or pyTorch optimizer creator function or
               tf.keras optimizer instance.
        :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function.
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param hidden_units: Int or hp sampling function from an integer space. The number of hidden
               units or filters for each convolutional layer. It is similar to `units` for LSTM.
               It defaults to 30. We will omit the hidden_units value if num_channels is specified.
               For hp sampling, see zoo.orca.automl.hp for more details.
               e.g. hp.grid_search([32, 64]).
        :param levels: Int or hp sampling function from an integer space. The number of levels of
               TemporalBlocks to use. It defaults to 8. We will omit the levels value if
               num_channels is specified.
        :param num_channels: List of integers. A list of hidden_units for each level. You could
               specify num_channels if you want different hidden_units for different levels.
               By default, num_channels equals to
               [hidden_units] * (levels - 1) + [output_target_num].
        :param kernel_size: Int or hp sampling function from an integer space.
               The size of the kernel to use in each convolutional layer.
        :param lr: float or hp sampling function from a float space. Learning rate.
               e.g. hp.choice([0.001, 0.003, 0.01])
        :param dropout: float or hp sampling function from a float space. Learning rate. Dropout
               rate. e.g. hp.uniform(0.1, 0.3)
        :param backend: The backend of the TCN model. We only support backend as "torch" for now.
        :param logs_dir: Local directory to save logs and results. It defaults to "/tmp/auto_tcn"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoTCN. It defaults to "auto_tcn"
        """
        # todo: support search for past_seq_len.
        # todo: add input check.
        if backend != "torch":
            raise ValueError(
                f"We only support backend as torch. Got {backend}")
        self.search_space = dict(
            input_feature_num=input_feature_num,
            output_feature_num=output_target_num,
            past_seq_len=past_seq_len,
            future_seq_len=future_seq_len,
            nhid=hidden_units,
            levels=levels,
            num_channels=num_channels,
            kernel_size=kernel_size,
            lr=lr,
            dropout=dropout,
        )
        self.metric = metric
        model_builder = PytorchModelBuilder(
            model_creator=model_creator,
            optimizer_creator=optimizer,
            loss_creator=loss,
        )
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            name=name)

    def fit(
        self,
        data,
        epochs=1,
        batch_size=32,
        validation_data=None,
        metric_threshold=None,
        n_sampling=1,
        search_alg=None,
        search_alg_params=None,
        scheduler=None,
        scheduler_params=None,
    ):
        """
        Automatically fit the model and search for the best hyper parameters.

        :param data: train data.
               For backend of "torch", data can be a tuple of ndarrays or a function that takes a
               config dictionary as parameter and returns a PyTorch DataLoader.
               For backend of "keras", data can be a tuple of ndarrays.
               If data is a tuple of ndarrays, it should be in the form of (x, y),
                where x is training input data and y is training target data.
        :param epochs: Max number of epochs to train in each trial. Defaults to 1.
               If you have also set metric_threshold, a trial will stop if either it has been
               optimized to the metric_threshold or it has been trained for {epochs} epochs.
        :param batch_size: Int or hp sampling function from an integer space. Training batch size.
               It defaults to 32.
        :param validation_data: Validation data. Validation data type should be the same as data.
        :param metric_threshold: a trial will be terminated when metric threshold is met
        :param n_sampling: Number of times to sample from the search_space. Defaults to 1.
               If hp.grid_search is in search_space, the grid will be repeated n_sampling of times.
               If this is -1, (virtually) infinite samples are generated
               until a stopping condition is met.
        :param search_alg: str, all supported searcher provided by ray tune
               (i.e."variant_generator", "random", "ax", "dragonfly", "skopt",
               "hyperopt", "bayesopt", "bohb", "nevergrad", "optuna", "zoopt" and
               "sigopt")
        :param search_alg_params: extra parameters for searcher algorithm besides search_space,
               metric and searcher mode
        :param scheduler: str, all supported scheduler provided by ray tune
        :param scheduler_params: parameters for scheduler

        :return:
        """
        self.search_space["batch_size"] = batch_size
        self.auto_est.fit(
            data=data,
            epochs=epochs,
            validation_data=validation_data,
            metric=self.metric,
            metric_threshold=metric_threshold,
            n_sampling=n_sampling,
            search_space=self.search_space,
            search_alg=search_alg,
            search_alg_params=search_alg_params,
            scheduler=scheduler,
            scheduler_params=scheduler_params,
        )

    def get_best_model(self):
        """
        Get the best tcn model.
        """
        return self.auto_est.get_best_model()
Пример #23
0
class AutoProphet:
    def __init__(self,
                 changepoint_prior_scale=0.05,
                 seasonality_prior_scale=10.0,
                 holidays_prior_scale=10.0,
                 seasonality_mode='additive',
                 changepoint_range=0.8,
                 metric='mse',
                 logs_dir="/tmp/auto_prophet_logs",
                 cpus_per_trial=1,
                 name="auto_prophet",
                 **prophet_config):
        """
        Create an automated Prophet Model.
        User need to specify either the exact value or the search space of the
        Prophet model hyperparameters. For details of the Prophet model hyperparameters, refer to
        https://facebook.github.io/prophet/docs/diagnostics.html#hyperparameter-tuning.

        :param changepoint_prior_scale: Int or hp sampling function from an integer space
            for hyperparameter changepoint_prior_scale for the Prophet model.
            For hp sampling, see zoo.chronos.orca.automl.hp for more details.
            e.g. hp.loguniform(0.001, 0.5).
        :param seasonality_prior_scale: hyperparameter seasonality_prior_scale for the
            Prophet model.
            e.g. hp.loguniform(0.01, 10).
        :param holidays_prior_scale: hyperparameter holidays_prior_scale for the
            Prophet model.
            e.g. hp.loguniform(0.01, 10).
        :param seasonality_mode: hyperparameter seasonality_mode for the
            Prophet model.
            e.g. hp.choice(['additive', 'multiplicative']).
        :param changepoint_range: hyperparameter changepoint_range for the
            Prophet model.
            e.g. hp.uniform(0.8, 0.95).
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param logs_dir: Local directory to save logs and results. It defaults to
            "/tmp/auto_prophet_logs"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoProphet. It defaults to "auto_prophet"
        :param prophet_config: Other Prophet hyperparameters.
        """
        self.search_space = {
            "changepoint_prior_scale": changepoint_prior_scale,
            "seasonality_prior_scale": seasonality_prior_scale,
            "holidays_prior_scale": holidays_prior_scale,
            "seasonality_mode": 'additive',
            "changepoint_range": changepoint_range,
        }
        self.metric = metric
        model_builder = ProphetBuilder()
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            name=name)

    def fit(
        self,
        data,
        epochs=1,
        validation_data=None,
        metric_threshold=None,
        n_sampling=1,
        search_alg=None,
        search_alg_params=None,
        scheduler=None,
        scheduler_params=None,
    ):
        """
        Automatically fit the model and search for the best hyperparameters.

        :param data: Training data, A 1-D numpy array.
        :param epochs: Max number of epochs to train in each trial. Defaults to 1.
               If you have also set metric_threshold, a trial will stop if either it has been
               optimized to the metric_threshold or it has been trained for {epochs} epochs.
        :param validation_data: Validation data. A 1-D numpy array.
        :param metric_threshold: a trial will be terminated when metric threshold is met
        :param n_sampling: Number of times to sample from the search_space. Defaults to 1.
               If hp.grid_search is in search_space, the grid will be repeated n_sampling of times.
               If this is -1, (virtually) infinite samples are generated
               until a stopping condition is met.
        :param search_alg: str, all supported searcher provided by ray tune
               (i.e."variant_generator", "random", "ax", "dragonfly", "skopt",
               "hyperopt", "bayesopt", "bohb", "nevergrad", "optuna", "zoopt" and
               "sigopt")
        :param search_alg_params: extra parameters for searcher algorithm besides search_space,
               metric and searcher mode
        :param scheduler: str, all supported scheduler provided by ray tune
        :param scheduler_params: parameters for scheduler
        """
        self.auto_est.fit(data=data,
                          validation_data=validation_data,
                          metric=self.metric,
                          metric_threshold=metric_threshold,
                          n_sampling=n_sampling,
                          search_space=self.search_space,
                          search_alg=search_alg,
                          search_alg_params=search_alg_params,
                          scheduler=scheduler,
                          scheduler_params=scheduler_params)

    def get_best_model(self):
        """
        Get the best Prophet model.
        """
        return self.auto_est.get_best_model()
Пример #24
0
    def __init__(self,
                 input_feature_num,
                 output_target_num,
                 past_seq_len,
                 future_seq_len,
                 optimizer,
                 loss,
                 metric,
                 lr=0.001,
                 lstm_hidden_dim=128,
                 lstm_layer_num=2,
                 dropout=0.25,
                 teacher_forcing=False,
                 backend="torch",
                 logs_dir="/tmp/auto_seq2seq",
                 cpus_per_trial=1,
                 name="auto_seq2seq"):
        """
        Create an AutoSeq2Seq.

        :param input_feature_num: Int. The number of features in the input
        :param output_target_num: Int. The number of targets in the output
        :param past_seq_len: Int. The number of historical steps used for forecasting.
        :param future_seq_len: Int. The number of future steps to forecast.
        :param optimizer: String or pyTorch optimizer creator function or
               tf.keras optimizer instance.
        :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function.
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param lr: float or hp sampling function from a float space. Learning rate.
               e.g. hp.choice([0.001, 0.003, 0.01])
        :param lstm_hidden_dim: LSTM hidden channel for decoder and encoder.
               hp.grid_search([32, 64, 128])
        :param lstm_layer_num:LSTM layer number for decoder and encoder.
               e.g. hp.grid_search([1, 4])
        :param dropout: float or hp sampling function from a float space. Learning rate. Dropout
               rate. e.g. hp.uniform(0.1, 0.3)
        :param teacher_forcing: If use teacher forcing in training.
        :param backend: The backend of the Seq2Seq model. We only support backend as "torch"
               for now.
        :param logs_dir: Local directory to save logs and results. It defaults to
               "/tmp/auto_seq2seq"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoSeq2Seq. It defaults to "auto_seq2seq"
        """
        # todo: support search for past_seq_len.
        # todo: add input check.
        if backend != "torch":
            raise ValueError(
                f"We only support backend as torch. Got {backend}")
        self.search_space = dict(input_feature_num=input_feature_num,
                                 output_feature_num=output_target_num,
                                 past_seq_len=past_seq_len,
                                 future_seq_len=future_seq_len,
                                 lstm_hidden_dim=lstm_hidden_dim,
                                 lstm_layer_num=lstm_layer_num,
                                 lr=lr,
                                 dropout=dropout,
                                 teacher_forcing=teacher_forcing)
        self.metric = metric
        model_builder = PytorchModelBuilder(
            model_creator=model_creator,
            optimizer_creator=optimizer,
            loss_creator=loss,
        )
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            name=name)
Пример #25
0
class AutoSeq2Seq:
    def __init__(self,
                 input_feature_num,
                 output_target_num,
                 past_seq_len,
                 future_seq_len,
                 optimizer,
                 loss,
                 metric,
                 lr=0.001,
                 lstm_hidden_dim=128,
                 lstm_layer_num=2,
                 dropout=0.25,
                 teacher_forcing=False,
                 backend="torch",
                 logs_dir="/tmp/auto_seq2seq",
                 cpus_per_trial=1,
                 name="auto_seq2seq"):
        """
        Create an AutoSeq2Seq.

        :param input_feature_num: Int. The number of features in the input
        :param output_target_num: Int. The number of targets in the output
        :param past_seq_len: Int. The number of historical steps used for forecasting.
        :param future_seq_len: Int. The number of future steps to forecast.
        :param optimizer: String or pyTorch optimizer creator function or
               tf.keras optimizer instance.
        :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function.
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param lr: float or hp sampling function from a float space. Learning rate.
               e.g. hp.choice([0.001, 0.003, 0.01])
        :param lstm_hidden_dim: LSTM hidden channel for decoder and encoder.
               hp.grid_search([32, 64, 128])
        :param lstm_layer_num:LSTM layer number for decoder and encoder.
               e.g. hp.grid_search([1, 4])
        :param dropout: float or hp sampling function from a float space. Learning rate. Dropout
               rate. e.g. hp.uniform(0.1, 0.3)
        :param teacher_forcing: If use teacher forcing in training.
        :param backend: The backend of the Seq2Seq model. We only support backend as "torch"
               for now.
        :param logs_dir: Local directory to save logs and results. It defaults to
               "/tmp/auto_seq2seq"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoSeq2Seq. It defaults to "auto_seq2seq"
        """
        # todo: support search for past_seq_len.
        # todo: add input check.
        if backend != "torch":
            raise ValueError(
                f"We only support backend as torch. Got {backend}")
        self.search_space = dict(input_feature_num=input_feature_num,
                                 output_feature_num=output_target_num,
                                 past_seq_len=past_seq_len,
                                 future_seq_len=future_seq_len,
                                 lstm_hidden_dim=lstm_hidden_dim,
                                 lstm_layer_num=lstm_layer_num,
                                 lr=lr,
                                 dropout=dropout,
                                 teacher_forcing=teacher_forcing)
        self.metric = metric
        model_builder = PytorchModelBuilder(
            model_creator=model_creator,
            optimizer_creator=optimizer,
            loss_creator=loss,
        )
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            name=name)

    def fit(
        self,
        data,
        epochs=1,
        batch_size=32,
        validation_data=None,
        metric_threshold=None,
        n_sampling=1,
        search_alg=None,
        search_alg_params=None,
        scheduler=None,
        scheduler_params=None,
    ):
        """
        Automatically fit the model and search for the best hyper parameters.

        :param data: train data.
               For backend of "torch", data can be a tuple of ndarrays or a function that takes a
               config dictionary as parameter and returns a PyTorch DataLoader.
               For backend of "keras", data can be a tuple of ndarrays.
               If data is a tuple of ndarrays, it should be in the form of (x, y),
                where x is training input data and y is training target data.
        :param epochs: Max number of epochs to train in each trial. Defaults to 1.
               If you have also set metric_threshold, a trial will stop if either it has been
               optimized to the metric_threshold or it has been trained for {epochs} epochs.
        :param batch_size: Int or hp sampling function from an integer space. Training batch size.
               It defaults to 32.
        :param validation_data: Validation data. Validation data type should be the same as data.
        :param metric_threshold: a trial will be terminated when metric threshold is met
        :param n_sampling: Number of times to sample from the search_space. Defaults to 1.
               If hp.grid_search is in search_space, the grid will be repeated n_sampling of times.
               If this is -1, (virtually) infinite samples are generated
               until a stopping condition is met.
        :param search_alg: str, all supported searcher provided by ray tune
               (i.e."variant_generator", "random", "ax", "dragonfly", "skopt",
               "hyperopt", "bayesopt", "bohb", "nevergrad", "optuna", "zoopt" and
               "sigopt")
        :param search_alg_params: extra parameters for searcher algorithm besides search_space,
               metric and searcher mode
        :param scheduler: str, all supported scheduler provided by ray tune
        :param scheduler_params: parameters for scheduler

        :return:
        """
        self.search_space["batch_size"] = batch_size
        self.auto_est.fit(
            data=data,
            epochs=epochs,
            validation_data=validation_data,
            metric=self.metric,
            metric_threshold=metric_threshold,
            n_sampling=n_sampling,
            search_space=self.search_space,
            search_alg=search_alg,
            search_alg_params=search_alg_params,
            scheduler=scheduler,
            scheduler_params=scheduler_params,
        )

    def get_best_model(self):
        """
        Get the best Seq2Seq model.
        """
        return self.auto_est.get_best_model()

    def get_best_config(self):
        """
        Get the best configuration

        :return: A dictionary of best hyper parameters
        """
        return self.auto_est.get_best_config()

    def _get_best_automl_model(self):
        return self.auto_est._get_best_automl_model()
Пример #26
0
    def __init__(self,
                 p=2,
                 q=2,
                 seasonal=True,
                 P=1,
                 Q=1,
                 m=7,
                 metric='mse',
                 logs_dir="/tmp/auto_arima_logs",
                 cpus_per_trial=1,
                 name="auto_arima",
                 **arima_config):
        """
        Create an automated ARIMA Model

        :param p: Int or hp sampling function from an integer space for hyperparameter p
               of the ARIMA model, for details you may refer to
               https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.
               For hp sampling, see zoo.chronos.orca.automl.hp for more details.
               e.g. hp.randint(0, 3).
        :param q: Int or hp sampling function from an integer space for hyperparameter q
               of the ARIMA model, for details you may refer to
               https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.
               e.g. hp.randint(0, 3).
        :param seasonal: Bool or hp sampling function from an integer space for whether to add
               seasonal components to the ARIMA model, for details you may refer to
               https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.
               e.g. hp.choice([True, False]).
        :param P: Int or hp sampling function from an integer space for hyperparameter P
               of the ARIMA model, for details you may refer to
               https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.
               For hp sampling, see zoo.chronos.orca.automl.hp for more details.
               e.g. hp.randint(0, 3).
        :param Q: Int or hp sampling function from an integer space for hyperparameter Q
               of the ARIMA model, for details you may refer to
               https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.
               e.g. hp.randint(0, 3).
        :param m: Int or hp sampling function from an integer space for hyperparameter p
               of the ARIMA model, for details you may refer to
               https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.
               e.g. hp.choice([4, 7, 12, 24, 365]).
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param logs_dir: Local directory to save logs and results. It defaults to
               "/tmp/auto_arima_logs"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoARIMA. It defaults to "auto_arima"
        :param arima_config: Other ARIMA hyperparameters. You may refer to
               https://facebook.github.io/prophet/docs/diagnostics.html#hyperparameter-tuning for
               the parameter names to specify.

        """
        self.search_space = {
            "p": p,
            "q": q,
            "seasonal": seasonal,
            "P": P,
            "Q": Q,
            "m": m,
        }
        self.metric = metric
        model_builder = ARIMABuilder()
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            name=name)
Пример #27
0
class AutoARIMA:
    def __init__(self,
                 p=2,
                 q=2,
                 seasonal=True,
                 P=1,
                 Q=1,
                 m=7,
                 metric='mse',
                 logs_dir="/tmp/auto_arima_logs",
                 cpus_per_trial=1,
                 name="auto_arima",
                 **arima_config):
        """
        Create an automated ARIMA Model

        :param p: Int or hp sampling function from an integer space for hyperparameter p
               of the ARIMA model, for details you may refer to
               https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.
               For hp sampling, see zoo.chronos.orca.automl.hp for more details.
               e.g. hp.randint(0, 3).
        :param q: Int or hp sampling function from an integer space for hyperparameter q
               of the ARIMA model, for details you may refer to
               https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.
               e.g. hp.randint(0, 3).
        :param seasonal: Bool or hp sampling function from an integer space for whether to add
               seasonal components to the ARIMA model, for details you may refer to
               https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.
               e.g. hp.choice([True, False]).
        :param P: Int or hp sampling function from an integer space for hyperparameter P
               of the ARIMA model, for details you may refer to
               https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.
               For hp sampling, see zoo.chronos.orca.automl.hp for more details.
               e.g. hp.randint(0, 3).
        :param Q: Int or hp sampling function from an integer space for hyperparameter Q
               of the ARIMA model, for details you may refer to
               https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.
               e.g. hp.randint(0, 3).
        :param m: Int or hp sampling function from an integer space for hyperparameter p
               of the ARIMA model, for details you may refer to
               https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ARIMA.html#pmdarima.arima.ARIMA.
               e.g. hp.choice([4, 7, 12, 24, 365]).
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param logs_dir: Local directory to save logs and results. It defaults to
               "/tmp/auto_arima_logs"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoARIMA. It defaults to "auto_arima"
        :param arima_config: Other ARIMA hyperparameters. You may refer to
               https://facebook.github.io/prophet/docs/diagnostics.html#hyperparameter-tuning for
               the parameter names to specify.

        """
        self.search_space = {
            "p": p,
            "q": q,
            "seasonal": seasonal,
            "P": P,
            "Q": Q,
            "m": m,
        }
        self.metric = metric
        model_builder = ARIMABuilder()
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            name=name)

    def fit(
        self,
        data,
        epochs=1,
        validation_data=None,
        metric_threshold=None,
        n_sampling=1,
        search_alg=None,
        search_alg_params=None,
        scheduler=None,
        scheduler_params=None,
    ):
        """
        Automatically fit the model and search for the best hyperparameters.
        :param data: Training data, A 1-D numpy array.
        :param epochs: Max number of epochs to train in each trial. Defaults to 1.
               If you have also set metric_threshold, a trial will stop if either it has been
               optimized to the metric_threshold or it has been trained for {epochs} epochs.
        :param validation_data: Validation data. A 1-D numpy array.
        :param metric_threshold: a trial will be terminated when metric threshold is met
        :param n_sampling: Number of times to sample from the search_space. Defaults to 1.
               If hp.grid_search is in search_space, the grid will be repeated n_sampling of times.
               If this is -1, (virtually) infinite samples are generated
               until a stopping condition is met.
        :param search_alg: str, all supported searcher provided by ray tune
               (i.e."variant_generator", "random", "ax", "dragonfly", "skopt",
               "hyperopt", "bayesopt", "bohb", "nevergrad", "optuna", "zoopt" and
               "sigopt")
        :param search_alg_params: extra parameters for searcher algorithm besides search_space,
               metric and searcher mode
        :param scheduler: str, all supported scheduler provided by ray tune
        :param scheduler_params: parameters for scheduler
        """
        self.auto_est.fit(data=data,
                          validation_data=validation_data,
                          metric=self.metric,
                          metric_threshold=metric_threshold,
                          n_sampling=n_sampling,
                          search_space=self.search_space,
                          search_alg=search_alg,
                          search_alg_params=search_alg_params,
                          scheduler=scheduler,
                          scheduler_params=scheduler_params)

    def get_best_model(self):
        """
        Get the best arima model.
        """
        return self.auto_est.get_best_model()
Пример #28
0
    def __init__(
        self,
        input_feature_num,
        output_target_num,
        past_seq_len,
        future_seq_len,
        optimizer,
        loss,
        metric,
        hidden_units=None,
        levels=None,
        num_channels=None,
        kernel_size=7,
        lr=0.001,
        dropout=0.2,
        backend="torch",
        logs_dir="/tmp/auto_tcn",
        cpus_per_trial=1,
        name="auto_tcn",
        remote_dir=None,
    ):
        """
        Create an AutoTCN.

        :param input_feature_num: Int. The number of features in the input
        :param output_target_num: Int. The number of targets in the output
        :param past_seq_len: Int. The number of historical steps used for forecasting.
        :param future_seq_len: Int. The number of future steps to forecast.
        :param optimizer: String or pyTorch optimizer creator function or
               tf.keras optimizer instance.
        :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function.
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param hidden_units: Int or hp sampling function from an integer space. The number of hidden
               units or filters for each convolutional layer. It is similar to `units` for LSTM.
               It defaults to 30. We will omit the hidden_units value if num_channels is specified.
               For hp sampling, see zoo.orca.automl.hp for more details.
               e.g. hp.grid_search([32, 64]).
        :param levels: Int or hp sampling function from an integer space. The number of levels of
               TemporalBlocks to use. It defaults to 8. We will omit the levels value if
               num_channels is specified.
        :param num_channels: List of integers. A list of hidden_units for each level. You could
               specify num_channels if you want different hidden_units for different levels.
               By default, num_channels equals to
               [hidden_units] * (levels - 1) + [output_target_num].
        :param kernel_size: Int or hp sampling function from an integer space.
               The size of the kernel to use in each convolutional layer.
        :param lr: float or hp sampling function from a float space. Learning rate.
               e.g. hp.choice([0.001, 0.003, 0.01])
        :param dropout: float or hp sampling function from a float space. Learning rate. Dropout
               rate. e.g. hp.uniform(0.1, 0.3)
        :param backend: The backend of the TCN model. We only support backend as "torch" for now.
        :param logs_dir: Local directory to save logs and results. It defaults to "/tmp/auto_tcn"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoTCN. It defaults to "auto_tcn"
        :param remote_dir: String. Remote directory to sync training results and checkpoints. It
            defaults to None and doesn't take effects while running in local. While running in
            cluster, it defaults to "hdfs:///tmp/{name}".
        """
        super().__init__()
        # todo: support search for past_seq_len.
        # todo: add input check.
        if backend != "torch":
            raise ValueError(
                f"We only support backend as torch. Got {backend}")
        self.search_space = dict(
            input_feature_num=input_feature_num,
            output_feature_num=output_target_num,
            past_seq_len=past_seq_len,
            future_seq_len=future_seq_len,
            nhid=hidden_units,
            levels=levels,
            num_channels=num_channels,
            kernel_size=kernel_size,
            lr=lr,
            dropout=dropout,
        )
        self.metric = metric
        model_builder = PytorchModelBuilder(
            model_creator=model_creator,
            optimizer_creator=optimizer,
            loss_creator=loss,
        )
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            remote_dir=remote_dir,
            name=name)