Python Evaluator.evaluate示例，zoo.orca.automl.metrics.Evaluator.evaluate Python示例

示例#1

0

显示文件

    def test_evaluate_predict_future_more_1(self):
        target_col = "values"
        metrics = ["mse", "r2"]
        future_seq_len = np.random.randint(2, 6)
        train_df, test_df, tsp, test_sample_num = self.get_input_tsp(
            future_seq_len, target_col)
        pipeline = tsp.fit(train_df, test_df)
        mse, rs = pipeline.evaluate(test_df, metrics=metrics)
        assert len(mse) == future_seq_len
        assert len(rs) == future_seq_len
        y_pred = pipeline.predict(test_df)
        assert y_pred.shape == (test_sample_num - default_past_seq_len + 1,
                                future_seq_len + 1)

        y_pred_df = pipeline.predict(test_df[:-future_seq_len])
        columns = [
            "{}_{}".format(target_col, i) for i in range(future_seq_len)
        ]
        y_pred_value = y_pred_df[columns].values

        y_df = test_df[default_past_seq_len:]
        y_value = TimeSequenceFeatureTransformer()._roll_test(
            y_df[target_col], future_seq_len)

        mse_pred_eval, rs_pred_eval = [
            Evaluator.evaluate(m, y_value, y_pred_value) for m in metrics
        ]
        mse_eval, rs_eval = pipeline.evaluate(test_df, metrics)
        assert_array_almost_equal(mse_pred_eval, mse_eval, decimal=2)
        assert_array_almost_equal(rs_pred_eval, rs_eval, decimal=2)

示例#2

0

显示文件

    def evaluate_with_onnx(self,
                           data,
                           metrics=['mse'],
                           multioutput="uniform_average",
                           batch_size=32):
        '''
        Evaluate the time series pipeline with onnx.

        :param data: data can be a TSDataset or data creator(will be supported).
               The TSDataset should follow the same operations as the training
               TSDataset used in AutoTSEstimator.fit.
        :param metrics: list. The evaluation metric name to optimize. e.g. ["mse"]
        :param multioutput: Defines aggregating of multiple output values.
               String in ['raw_values', 'uniform_average']. The value defaults to
               'uniform_average'.
        :param batch_size: predict batch_size, the process will cost more time
               if batch_size is small while cost less memory. The param is only
               effective when data is a TSDataset. The values defaults to 32.
        '''
        # predict with onnx
        x, y = self._tsdataset_to_numpy(data, is_predict=False)
        yhat = self._best_model.predict_with_onnx(x, batch_size=batch_size)
        yhat = self._tsdataset_unscale(yhat)
        # unscale
        y = self._tsdataset_unscale(y)
        # evaluate
        eval_result = [
            Evaluator.evaluate(m,
                               y_true=y,
                               y_pred=yhat,
                               multioutput=multioutput) for m in metrics
        ]
        return eval_result

示例#3

0

显示文件

    def evaluate(self, target, data=None, metrics=['mse']):
        """
        Evaluate on the prediction results. We predict horizon time-points ahead the input data
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        target.
        :param data: Prophet predicts the horizon steps foreward from the training data.
            So data should be None as it is not used.
        :param target: target for evaluation. A dataframe with 2 columns, where column 'ds'
               indicating date and column 'y' indicating target.
        :param metrics: a list of metrics in string format
        :return: a list of metric evaluation results
        """
        if data is not None:
            raise ValueError("We don't support input data currently")
        if target is None:
            raise ValueError("Input invalid target of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling evaluate"
            )

        target_pred = self.model.predict(target)
        return [
            Evaluator.evaluate(m, target.y.values, target_pred.yhat.values)
            for m in metrics
        ]

示例#4

0

显示文件

 def evaluate(self, df, metric=['mse']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metric: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     if isinstance(metric, str):
         metric = [metric]
     x, y = self._process_data(df, mode="val")
     y_pred = self.model.predict(x)
     y_unscale, y_pred_unscale = self.ft.post_processing(df,
                                                         y_pred,
                                                         is_train=True)
     if len(y_pred.shape) > 1 and y_pred.shape[1] == 1:
         multioutput = 'uniform_average'
     else:
         multioutput = 'raw_values'
     return [
         Evaluator.evaluate(m,
                            y_unscale,
                            y_pred_unscale,
                            multioutput=multioutput) for m in metric
     ]

示例#5

0

显示文件

    def evaluate(self, x, y, metrics=['mse']):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param x: We don't support input x currently.
        :param y: target. We interpret the second dimension of y as the horizon length for
            evaluation.
        :param metrics: a list of metrics in string format
        :return: a list of metric evaluation results
        """
        if x is None:
            raise ValueError("Input invalid x of None")
        if y is None:
            raise ValueError("Input invalid y of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling predict"
            )

        if isinstance(y, pd.DataFrame):
            y = y.values
        self.model.n_jobs = self.n_jobs
        y_pred = self.predict(x)

        result_list = []
        for metric in metrics:
            if callable(metric):
                result_list.append(metric(y, y_pred))
            else:
                result_list.append(Evaluator.evaluate(metric, y, y_pred))
        return result_list

示例#6

0

显示文件

文件： Seq2Seq.py 项目： yangw1234/analytics-zoo

 def evaluate(self, x, y, metric=['mse']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metric: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     y_pred = self.predict(x)
     # y = np.squeeze(y, axis=2)
     if self.target_col_num == 1:
         return [Evaluator.evaluate(m, y, y_pred) for m in metric]
     else:
         return [np.array([Evaluator.evaluate(m, y[:, i, :], y_pred[:, i, :])
                           for i in range(self.future_seq_len)])
                 for m in metric]

示例#7

0

显示文件

文件： base_pytorch_model.py 项目： yangw1234/analytics-zoo

    def evaluate(self, x, y, metrics=['mse'], multioutput="raw_values", batch_size=32):
        # reshape 1dim input
        x = self._reshape_input(x)
        y = self._reshape_input(y)

        yhat = self.predict(x, batch_size=batch_size)
        eval_result = [Evaluator.evaluate(m, y_true=y, y_pred=yhat, multioutput=multioutput)
                       for m in metrics]
        return eval_result

示例#8

0

显示文件

文件： base_keras_model.py 项目： yangw1234/analytics-zoo

 def evaluate(self, x, y, metrics=['mse']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metrics: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     y_pred = self.predict(x)
     return [Evaluator.evaluate(m, y, y_pred) for m in metrics]

示例#9

0

显示文件

    def evaluate(self,
                 x=None,
                 y=None,
                 metrics=None,
                 target_covariates=None,
                 target_dti=None,
                 num_workers=None):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param x: We don't support input x currently.
        :param y: target. We interpret the second dimension of y as the horizon length for
            evaluation.
        :param metrics: a list of metrics in string format
        :param target_covariates: covariates corresponding to target_value.
            2-D ndarray or None.
            The shape of ndarray should be (r, horizon), where r is the number of covariates.
            Global covariates for all time series. If None, only default time coveriates will be
            used while use_time is True. If not, the time coveriates used is the stack of input
            covariates and default time coveriates.
        :param target_dti: dti corresponding to target_value.
            DatetimeIndex or None.
            If None, use default fixed frequency DatetimeIndex generated with the last date of x in
            fit and freq.
        :param num_workers: the number of workers to use in evaluate. It defaults to 1.
        :return: a list of metric evaluation results
        """
        if x is not None:
            raise ValueError("We don't support input x directly.")
        if y is None:
            raise ValueError("Input invalid y of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling predict"
            )
        if len(y.shape) == 1:
            y = np.expand_dims(y, axis=1)
            horizon = 1
        else:
            horizon = y.shape[1]
        result = self.predict(x=None,
                              horizon=horizon,
                              future_covariates=target_covariates,
                              future_dti=target_dti,
                              num_workers=num_workers)

        if y.shape[1] == 1:
            multioutput = 'uniform_average'
        else:
            multioutput = 'raw_values'
        return [
            Evaluator.evaluate(m, y, result, multioutput=multioutput)
            for m in metrics
        ]

示例#10

0

显示文件

文件： MTNet_keras.py 项目： yangw1234/analytics-zoo

 def evaluate(self, x, y, metrics=['mse']):
     """
     Evaluate on x, y
     :param x: input
     :param y: target
     :param metric: a list of metrics in string format
     :return: a list of metric evaluation results
     """
     y_pred = self.predict(x)
     if y_pred.shape[1] == 1:
         multioutput = 'uniform_average'
     else:
         multioutput = 'raw_values'
     # y = np.squeeze(y, axis=2)
     return [Evaluator.evaluate(m, y, y_pred, multioutput=multioutput) for m in metrics]

示例#11

0

显示文件

文件： base_pytorch_model.py 项目： yangw1234/analytics-zoo

 def _validate(self, validation_loader, metric_name, metric_func=None):
     if not metric_name:
         assert metric_func, "You must input valid metric_func or metric_name"
         metric_name = metric_func.__name__
     self.model.eval()
     with torch.no_grad():
         yhat_list = []
         y_list = []
         for x_valid_batch, y_valid_batch in validation_loader:
             yhat_list.append(self.model(x_valid_batch).numpy())
             y_list.append(y_valid_batch.numpy())
         yhat = np.concatenate(yhat_list, axis=0)
         y = np.concatenate(y_list, axis=0)
     # val_loss = self.criterion(yhat, y)
     if metric_func:
         eval_result = metric_func(y, yhat)
     else:
         eval_result = Evaluator.evaluate(metric=metric_name,
                                          y_true=y, y_pred=yhat,
                                          multioutput='uniform_average')
     return {metric_name: eval_result}

示例#12

0

显示文件

文件： arima.py 项目： yangw1234/analytics-zoo

    def evaluate(self, target, x=None, metrics=['mse'], rolling=False):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param target: target for evaluation.
        :param x: ARIMA predicts the horizon steps foreward from the training data.
            So x should be None as it is not used.
        :param metrics: a list of metrics in string format
        :param rolling: whether to use rolling prediction
        :return: a list of metric evaluation results
        """
        if x is not None:
            raise ValueError("We don't support input x currently")
        if target is None:
            raise ValueError("Input invalid target of None")
        if self.model is None:
            raise Exception("Needs to call fit_eval or restore first before calling evaluate")

        forecasts = self.predict(horizon=len(target), rolling=rolling)

        return [Evaluator.evaluate(m, target, forecasts) for m in metrics]

示例#13

0

显示文件

文件： autoestimator_pytorch.py 项目： yangw1234/analytics-zoo

def train_example(args):
    auto_est = AutoEstimator.from_torch(
        model_creator=model_creator,
        optimizer="Adam",
        loss="BCELoss",
        logs_dir="/tmp/zoo_automl_logs",
        resources_per_trial={"cpu": args.cpus_per_trial},
        name="test_fit")
    train_data, val_data = get_train_val_data()
    auto_est.fit(data=train_data,
                 epochs=args.epochs,
                 validation_data=val_data,
                 metric="accuracy",
                 n_sampling=args.trials,
                 search_space=create_linear_search_space())
    # Choose the best model
    best_model = auto_est.get_best_model()

    y_hat = best_model(torch.from_numpy(val_data[0]).float()).detach().numpy()
    from zoo.orca.automl.metrics import Evaluator
    accuracy = Evaluator.evaluate(metric="accuracy",
                                  y_true=val_data[1],
                                  y_pred=y_hat)
    print("Evaluate: accuracy is", accuracy)

示例#14

0

显示文件

    else:
        recipe = XgbRegressorGridRandomRecipe(num_rand_samples=num_rand_samples,
                                              n_estimators=list(n_estimators_range),
                                              max_depth=list(max_depth_range),
                                              )
        search_alg = None
        scheduler = None
        scheduler_params = None

    auto_xgb_clf = AutoXGBClassifier(cpus_per_trial=4, name="auto_xgb_classifier", **config)
    import time
    start = time.time()
    auto_xgb_clf.fit(data=(X_train, y_train),
                     validation_data=(X_val, y_val),
                     metric="error",
                     metric_mode="min",
                     n_sampling=recipe.num_samples,
                     search_space=recipe.search_space(),
                     search_alg=search_alg,
                     search_alg_params=None,
                     scheduler=scheduler,
                     scheduler_params=scheduler_params)
    end = time.time()
    print("elapse: ", (end-start), "s")
    best_model = auto_xgb_clf.get_best_model()

    y_hat = best_model.predict(X_val)
    from zoo.orca.automl.metrics import Evaluator
    accuracy = Evaluator.evaluate(metric="accuracy", y_true=y_val, y_pred=y_hat)
    print("Evaluate: accuracy is", accuracy)

示例#15

0

显示文件

文件： run_electricity.py 项目： intel-analytics/analytics-zoo

        logger.info('Stopping context for yarn cluster and init context on local.')
        stop_orca_context()
        import ray
        ray.init(num_cpus=args.num_predict_cores)

    logger.info('Start prediction.')
    yhat = model.predict(horizon=horizon,
                         num_workers=args.num_predict_workers
                         if args.predict_local else args.num_workers)
    logger.info("Prediction ends")
    yhat = yhat["prediction"]
    target_value = dict({"y": target_data})

    # evaluate with prediction results
    from zoo.orca.automl.metrics import Evaluator
    evaluate_mse = Evaluator.evaluate("mse", target_data, yhat)

    # You can also evaluate directly without prediction results.
    mse, smape = model.evaluate(target_value=target_value, metric=['mse', 'smape'],
                                num_workers=args.num_predict_workers if args.predict_local
                                else args.num_workers)
    print(f"Evaluation results:\nmse: {mse}, \nsmape: {smape}")
    logger.info("Evaluation ends")

    # incremental fitting
    logger.info("Start fit incremental")
    model.fit_incremental({'y': target_data})
    logger.info("Start evaluation after fit incremental")
    incr_target_value = dict({"y": incr_target_data})
    mse, smape = model.evaluate(target_value=incr_target_value, metric=['mse', 'smape'],
                                num_workers=args.num_predict_workers

示例#16

0

显示文件

            min_child_weight=min_child_weight)
        search_alg = None
        search_alg_params = None
        scheduler = None
        scheduler_params = None

    auto_xgb_reg = AutoXGBRegressor(
        cpus_per_trial=2,
        name="auto_xgb_regressor",
        **config)
    auto_xgb_reg.fit(data=(X_train, y_train),
                     validation_data=(X_val, y_val),
                     metric="rmse",
                     n_sampling=recipe.num_samples,
                     search_space=recipe.search_space(),
                     search_alg=search_alg,
                     search_alg_params=None,
                     scheduler=scheduler,
                     scheduler_params=scheduler_params)

    print("Training completed.")
    best_model = auto_xgb_reg.get_best_model()
    y_hat = best_model.predict(X_val)

    from zoo.orca.automl.metrics import Evaluator
    rmse = Evaluator.evaluate(metric="rmse", y_true=y_val, y_pred=y_hat)
    print(f"Evaluate: the square root of mean square error is {rmse:.2f}")

    ray_ctx.stop()
    sc.stop()

示例#17

0

显示文件

                         name="auto_lstm")
    auto_lstm.fit(
        data=get_data_creator(tsdata_train),
        epochs=args.epoch,
        batch_size=hp.choice([32, 64]),
        validation_data=get_data_creator(tsdata_valid),
        n_sampling=args.n_sampling,
    )
    best_model = auto_lstm.get_best_model()
    best_config = auto_lstm.get_best_config()

    x, y = tsdata_test\
        .roll(lookback=best_config["past_seq_len"],
              horizon=best_config["future_seq_len"])\
        .to_numpy()
    yhat = best_model(torch.from_numpy(x).float()).detach().numpy()

    y_unscale = tsdata_test.unscale_numpy(y)
    yhat_unscale = tsdata_test.unscale_numpy(np.expand_dims(yhat, axis=1))

    result = [
        Evaluator.evaluate(m,
                           y_true=y_unscale,
                           y_pred=yhat_unscale,
                           multioutput="uniform_average")
        for m in ['rmse', 'smape']
    ]
    print(f'rmse is {result[0]}, sampe is {result[1]}')
    print(f'The hyperparameters of the model are {best_config}')
    stop_orca_context()

示例#18

0

显示文件

    forecaster = Seq2SeqForecaster(past_seq_len=100,
                                   future_seq_len=10,
                                   input_feature_num=x_train.shape[-1],
                                   output_feature_num=2,
                                   metrics=['mse'],
                                   distributed=True,
                                   workers_per_node=args.workers_per_node,
                                   seed=0)

    forecaster.fit(
        (x_train, y_train),
        epochs=args.epochs,
        batch_size=512 //
        (1 if not forecaster.distributed else args.workers_per_node))

    yhat = forecaster.predict(x_test)
    unscale_yhat = tsdata_test.unscale_numpy(yhat)
    unscale_y_test = tsdata_test.unscale_numpy(y_test)
    rmse, smape = [
        Evaluator.evaluate(m,
                           y_true=unscale_y_test,
                           y_pred=unscale_yhat,
                           multioutput='raw_values')
        for m in ['rmse', 'smape']
    ]
    print(f'rmse is: {np.mean(rmse)}')
    print(f'smape is: {np.mean(smape):.4f}')

    stop_orca_context()

示例#19

0

显示文件

文件： autolstm_nyc_taxi.py 项目： intel-analytics/analytics-zoo

    auto_lstm = AutoLSTM(input_feature_num=1,
                         output_target_num=1,
                         past_seq_len=20,
                         hidden_dim=hp.grid_search([32, 64]),
                         layer_num=hp.randint(1, 3),
                         lr=hp.choice([0.01, 0.03, 0.1]),
                         dropout=hp.uniform(0.1, 0.2),
                         optimizer='Adam',
                         loss=torch.nn.MSELoss(),
                         metric="mse")

    x_train, y_train = tsdata_train.roll(lookback=20, horizon=1).to_numpy()
    x_val, y_val = tsdata_test.roll(lookback=20, horizon=1).to_numpy()
    x_test, y_test = tsdata_test.roll(lookback=20, horizon=1).to_numpy()

    auto_lstm.fit(data=(x_train, y_train),
                  epochs=args.epochs,
                  validation_data=(x_val, y_val))

    yhat = auto_lstm.predict(x_test)
    unscale_y_test = tsdata_test.unscale_numpy(y_test)
    unscale_yhat = tsdata_test.unscale_numpy(yhat)

    rmse, smape = [Evaluator.evaluate(m, y_true=unscale_y_test,
                                      y_pred=unscale_yhat) for m in ['rmse', 'smape']]
    print(f'rmse is {np.mean(rmse)}')
    print(f'sampe is {np.mean(smape)}')

    stop_orca_context()