Пример #1
0
    def evaluate(self, target, data=None, metrics=['mse']):
        """
        Evaluate on the prediction results. We predict horizon time-points ahead the input data
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        target.
        :param data: Prophet predicts the horizon steps foreward from the training data.
            So data should be None as it is not used.
        :param target: target for evaluation. A dataframe with 2 columns, where column 'ds'
               indicating date and column 'y' indicating target.
        :param metrics: a list of metrics in string format
        :return: a list of metric evaluation results
        """
        if data is not None:
            raise ValueError("We don't support input data currently")
        if target is None:
            raise ValueError("Input invalid target of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling evaluate"
            )

        target_pred = self.model.predict(target)
        return [
            Evaluator.evaluate(m, target.y.values, target_pred.yhat.values)
            for m in metrics
        ]
Пример #2
0
    def evaluate_with_onnx(self,
                           data,
                           metrics=['mse'],
                           multioutput="uniform_average",
                           batch_size=32):
        '''
        Evaluate the time series pipeline with onnx.

        :param data: data can be a TSDataset or data creator(will be supported).
               The TSDataset should follow the same operations as the training
               TSDataset used in AutoTSEstimator.fit.
        :param metrics: list. The evaluation metric name to optimize. e.g. ["mse"]
        :param multioutput: Defines aggregating of multiple output values.
               String in ['raw_values', 'uniform_average']. The value defaults to
               'uniform_average'.
        :param batch_size: predict batch_size, the process will cost more time
               if batch_size is small while cost less memory. The param is only
               effective when data is a TSDataset. The values defaults to 32.
        '''
        # predict with onnx
        x, y = self._tsdataset_to_numpy(data, is_predict=False)
        yhat = self._best_model.predict_with_onnx(x, batch_size=batch_size)
        yhat = self._tsdataset_unscale(yhat)
        # unscale
        y = self._tsdataset_unscale(y)
        # evaluate
        eval_result = [
            Evaluator.evaluate(m,
                               y_true=y,
                               y_pred=yhat,
                               multioutput=multioutput) for m in metrics
        ]
        return eval_result
Пример #3
0
    def test_evaluate_predict_future_more_1(self):
        target_col = "values"
        metrics = ["mse", "r2"]
        future_seq_len = np.random.randint(2, 6)
        train_df, test_df, tsp, test_sample_num = self.get_input_tsp(
            future_seq_len, target_col)
        pipeline = tsp.fit(train_df, test_df)
        mse, rs = pipeline.evaluate(test_df, metrics=metrics)
        assert len(mse) == future_seq_len
        assert len(rs) == future_seq_len
        y_pred = pipeline.predict(test_df)
        assert y_pred.shape == (test_sample_num - default_past_seq_len + 1,
                                future_seq_len + 1)

        y_pred_df = pipeline.predict(test_df[:-future_seq_len])
        columns = [
            "{}_{}".format(target_col, i) for i in range(future_seq_len)
        ]
        y_pred_value = y_pred_df[columns].values

        y_df = test_df[default_past_seq_len:]
        y_value = TimeSequenceFeatureTransformer()._roll_test(
            y_df[target_col], future_seq_len)

        mse_pred_eval, rs_pred_eval = [
            Evaluator.evaluate(m, y_value, y_pred_value) for m in metrics
        ]
        mse_eval, rs_eval = pipeline.evaluate(test_df, metrics)
        assert_array_almost_equal(mse_pred_eval, mse_eval, decimal=2)
        assert_array_almost_equal(rs_pred_eval, rs_eval, decimal=2)
Пример #4
0
 def evaluate(self, df, metric=['mse']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metric: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     if isinstance(metric, str):
         metric = [metric]
     x, y = self._process_data(df, mode="val")
     y_pred = self.model.predict(x)
     y_unscale, y_pred_unscale = self.ft.post_processing(df,
                                                         y_pred,
                                                         is_train=True)
     if len(y_pred.shape) > 1 and y_pred.shape[1] == 1:
         multioutput = 'uniform_average'
     else:
         multioutput = 'raw_values'
     return [
         Evaluator.evaluate(m,
                            y_unscale,
                            y_pred_unscale,
                            multioutput=multioutput) for m in metric
     ]
Пример #5
0
    def evaluate(self, x, y, metrics=['mse']):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param x: We don't support input x currently.
        :param y: target. We interpret the second dimension of y as the horizon length for
            evaluation.
        :param metrics: a list of metrics in string format
        :return: a list of metric evaluation results
        """
        if x is None:
            raise ValueError("Input invalid x of None")
        if y is None:
            raise ValueError("Input invalid y of None")
        if self.model is None:
            raise Exception("Needs to call fit_eval or restore first before calling predict")

        if isinstance(y, pd.DataFrame):
            y = y.values
        self.model.n_jobs = self.n_jobs
        y_pred = self.predict(x)

        result_list = []
        for metric in metrics:
            if callable(metric):
                result_list.append(metric(y, y_pred))
            else:
                result_list.append(Evaluator.evaluate(metric, y, y_pred))
        return result_list
Пример #6
0
 def evaluate(self, x, y, metrics=['mse']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metrics: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     y_pred = self.predict(x)
     return [Evaluator.evaluate(m, y, y_pred) for m in metrics]
Пример #7
0
 def evaluate(self, x, y, metric=['mse']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metric: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     y_pred = self.predict(x)
     # y = np.squeeze(y, axis=2)
     if self.target_col_num == 1:
         return [Evaluator.evaluate(m, y, y_pred) for m in metric]
     else:
         return [
             np.array([
                 Evaluator.evaluate(m, y[:, i, :], y_pred[:, i, :])
                 for i in range(self.future_seq_len)
             ]) for m in metric
         ]
Пример #8
0
 def evaluate(self, x, y, metrics=['mse']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metric: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     y_pred = self.predict(x)
     if y_pred.shape[1] == 1:
         multioutput = 'uniform_average'
     else:
         multioutput = 'raw_values'
     # y = np.squeeze(y, axis=2)
     return [Evaluator.evaluate(m, y, y_pred, multioutput=multioutput) for m in metrics]
Пример #9
0
    def evaluate(self,
                 x,
                 y,
                 metrics=['mse'],
                 multioutput="raw_values",
                 batch_size=32):
        # reshape 1dim input
        x = self._reshape_input(x)
        y = self._reshape_input(y)

        yhat = self.predict(x, batch_size=batch_size)
        eval_result = [
            Evaluator.evaluate(m,
                               y_true=y,
                               y_pred=yhat,
                               multioutput=multioutput) for m in metrics
        ]
        return eval_result
Пример #10
0
    def evaluate(self, x=None, y=None, metrics=None, target_covariates=None,
                 target_dti=None, num_workers=None):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param x: We don't support input x currently.
        :param y: target. We interpret the second dimension of y as the horizon length for
            evaluation.
        :param metrics: a list of metrics in string format
        :param target_covariates: covariates corresponding to target_value.
            2-D ndarray or None.
            The shape of ndarray should be (r, horizon), where r is the number of covariates.
            Global covariates for all time series. If None, only default time coveriates will be
            used while use_time is True. If not, the time coveriates used is the stack of input
            covariates and default time coveriates.
        :param target_dti: dti corresponding to target_value.
            DatetimeIndex or None.
            If None, use default fixed frequency DatetimeIndex generated with the last date of x in
            fit and freq.
        :param num_workers: the number of workers to use in evaluate. It defaults to 1.
        :return: a list of metric evaluation results
        """
        if x is not None:
            raise ValueError("We don't support input x directly.")
        if y is None:
            raise ValueError("Input invalid y of None")
        if self.model is None:
            raise Exception("Needs to call fit_eval or restore first before calling predict")
        if len(y.shape) == 1:
            y = np.expand_dims(y, axis=1)
            horizon = 1
        else:
            horizon = y.shape[1]
        result = self.predict(x=None, horizon=horizon,
                              future_covariates=target_covariates,
                              future_dti=target_dti,
                              num_workers=num_workers)

        if y.shape[1] == 1:
            multioutput = 'uniform_average'
        else:
            multioutput = 'raw_values'
        return [Evaluator.evaluate(m, y, result, multioutput=multioutput) for m in metrics]
Пример #11
0
 def _validate(self, validation_loader, metric_name, metric_func=None):
     if not metric_name:
         assert metric_func, "You must input valid metric_func or metric_name"
         metric_name = metric_func.__name__
     self.model.eval()
     with torch.no_grad():
         yhat_list = []
         y_list = []
         for x_valid_batch, y_valid_batch in validation_loader:
             yhat_list.append(self.model(x_valid_batch).numpy())
             y_list.append(y_valid_batch.numpy())
         yhat = np.concatenate(yhat_list, axis=0)
         y = np.concatenate(y_list, axis=0)
     # val_loss = self.criterion(yhat, y)
     if metric_func:
         eval_result = metric_func(y, yhat)
     else:
         eval_result = Evaluator.evaluate(metric=metric_name,
                                          y_true=y,
                                          y_pred=yhat,
                                          multioutput='uniform_average')
     return {metric_name: eval_result}
Пример #12
0
def train_example(args):
    auto_est = AutoEstimator.from_torch(
        model_creator=model_creator,
        optimizer="Adam",
        loss="BCELoss",
        logs_dir="/tmp/zoo_automl_logs",
        resources_per_trial={"cpu": args.cpus_per_trial},
        name="test_fit")
    train_data, val_data = get_train_val_data()
    auto_est.fit(data=train_data,
                 epochs=args.epochs,
                 validation_data=val_data,
                 metric="accuracy",
                 n_sampling=args.trials,
                 search_space=create_linear_search_space())
    # Choose the best model
    best_model = auto_est.get_best_model()

    y_hat = best_model(torch.from_numpy(val_data[0]).float()).detach().numpy()
    from bigdl.orca.automl.metrics import Evaluator
    accuracy = Evaluator.evaluate(metric="accuracy", y_true=val_data[1], y_pred=y_hat)
    print("Evaluate: accuracy is", accuracy)
Пример #13
0
    def evaluate(self, target, x=None, metrics=['mse'], rolling=False):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param target: target for evaluation.
        :param x: ARIMA predicts the horizon steps foreward from the training data.
            So x should be None as it is not used.
        :param metrics: a list of metrics in string format
        :param rolling: whether to use rolling prediction
        :return: a list of metric evaluation results
        """
        if x is not None:
            raise ValueError("We don't support input x currently")
        if target is None:
            raise ValueError("Input invalid target of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling evaluate"
            )

        forecasts = self.predict(horizon=len(target), rolling=rolling)

        return [Evaluator.evaluate(m, target, forecasts) for m in metrics]
Пример #14
0
                         output_target_num=1,
                         past_seq_len=20,
                         hidden_dim=hp.grid_search([32, 64]),
                         layer_num=hp.randint(1, 3),
                         lr=hp.choice([0.01, 0.03, 0.1]),
                         dropout=hp.uniform(0.1, 0.2),
                         optimizer='Adam',
                         loss=torch.nn.MSELoss(),
                         metric="mse")

    x_train, y_train = tsdata_train.roll(lookback=20, horizon=1).to_numpy()
    x_val, y_val = tsdata_test.roll(lookback=20, horizon=1).to_numpy()
    x_test, y_test = tsdata_test.roll(lookback=20, horizon=1).to_numpy()

    auto_lstm.fit(data=(x_train, y_train),
                  epochs=args.epochs,
                  validation_data=(x_val, y_val))

    yhat = auto_lstm.predict(x_test)
    unscale_y_test = tsdata_test.unscale_numpy(y_test)
    unscale_yhat = tsdata_test.unscale_numpy(yhat)

    rmse, smape = [
        Evaluator.evaluate(m, y_true=unscale_y_test, y_pred=unscale_yhat)
        for m in ['rmse', 'smape']
    ]
    print(f'rmse is {np.mean(rmse)}')
    print(f'sampe is {np.mean(smape)}')

    stop_orca_context()
Пример #15
0
            "max_depth": hp.grid_search(list(max_depth_range)),
            "lr": hp.loguniform(1e-4, 1e-1),
            "min_child_weight": hp.choice(min_child_weight),
        }
        search_alg = None
        search_alg_params = None
        scheduler = None
        scheduler_params = None

    auto_xgb_reg = AutoXGBRegressor(cpus_per_trial=2,
                                    name="auto_xgb_regressor",
                                    **config)
    auto_xgb_reg.fit(data=(X_train, y_train),
                     validation_data=(X_val, y_val),
                     metric="rmse",
                     n_sampling=num_rand_samples,
                     search_space=search_space,
                     search_alg=search_alg,
                     search_alg_params=None,
                     scheduler=scheduler,
                     scheduler_params=scheduler_params)

    print("Training completed.")
    best_model = auto_xgb_reg.get_best_model()
    y_hat = best_model.predict(X_val)

    from bigdl.orca.automl.metrics import Evaluator
    rmse = Evaluator.evaluate(metric="rmse", y_true=y_val, y_pred=y_hat)
    print(f"Evaluate: the square root of mean square error is {rmse:.2f}")
    stop_orca_context()
Пример #16
0
        logger.info('Stopping context for yarn cluster and init context on local.')
        stop_orca_context()
        import ray
        ray.init(num_cpus=args.num_predict_cores)

    logger.info('Start prediction.')
    yhat = model.predict(horizon=horizon,
                         num_workers=args.num_predict_workers
                         if args.predict_local else args.num_workers)
    logger.info("Prediction ends")
    yhat = yhat["prediction"]
    target_value = dict({"y": target_data})

    # evaluate with prediction results
    from bigdl.orca.automl.metrics import Evaluator
    evaluate_mse = Evaluator.evaluate("mse", target_data, yhat)

    # You can also evaluate directly without prediction results.
    mse, smape = model.evaluate(target_value=target_value, metric=['mse', 'smape'],
                                num_workers=args.num_predict_workers if args.predict_local
                                else args.num_workers)
    print(f"Evaluation results:\nmse: {mse}, \nsmape: {smape}")
    logger.info("Evaluation ends")

    # incremental fitting
    logger.info("Start fit incremental")
    model.fit_incremental({'y': target_data})
    logger.info("Start evaluation after fit incremental")
    incr_target_value = dict({"y": incr_target_data})
    mse, smape = model.evaluate(target_value=incr_target_value, metric=['mse', 'smape'],
                                num_workers=args.num_predict_workers
Пример #17
0
    tsdata_train, tsdata_test = get_tsdata()
    x_train, y_train = tsdata_train.to_numpy()
    x_test, y_test = tsdata_test.to_numpy()

    forecaster = Seq2SeqForecaster(past_seq_len=100,
                                   future_seq_len=10,
                                   input_feature_num=x_train.shape[-1],
                                   output_feature_num=2,
                                   metrics=['mse'],
                                   distributed=True,
                                   workers_per_node=args.workers_per_node,
                                   seed=0)

    forecaster.fit((x_train, y_train), epochs=args.epochs, batch_size=512)

    yhat = forecaster.predict(x_test)
    unscale_yhat = tsdata_test.unscale_numpy(yhat)
    unscale_y_test = tsdata_test.unscale_numpy(y_test)
    rmse, smape = [
        Evaluator.evaluate(m,
                           y_true=unscale_y_test,
                           y_pred=unscale_yhat,
                           multioutput='raw_values')
        for m in ['rmse', 'smape']
    ]
    print(f'rmse is: {np.mean(rmse)}')
    print(f'smape is: {np.mean(smape):.4f}')

    stop_orca_context()