示例#1
0
def test_evaluate_custom_metric_success():
    eval_df = pd.DataFrame({
        "prediction": [1.2, 1.9, 3.2],
        "target": [1, 2, 3]
    })
    metrics = _get_regressor_metrics(eval_df["target"], eval_df["prediction"])

    def example_custom_metric(_, given_metrics):
        return {
            "example_count_times_1_point_5":
            given_metrics["example_count"] * 1.5,
            "sum_on_label_minus_5": given_metrics["sum_on_label"] - 5,
            "example_np_metric_1": np.float32(123.2),
            "example_np_metric_2": np.ulonglong(10000000),
        }

    res_metrics, res_artifacts = _evaluate_custom_metric(
        0, example_custom_metric, eval_df, metrics)
    assert res_metrics == {
        "example_count_times_1_point_5": metrics["example_count"] * 1.5,
        "sum_on_label_minus_5": metrics["sum_on_label"] - 5,
        "example_np_metric_1": np.float32(123.2),
        "example_np_metric_2": np.ulonglong(10000000),
    }
    assert res_artifacts is None

    def example_custom_metric_with_artifacts(given_df, given_metrics):
        return (
            {
                "example_count_times_1_point_5":
                given_metrics["example_count"] * 1.5,
                "sum_on_label_minus_5": given_metrics["sum_on_label"] - 5,
                "example_np_metric_1": np.float32(123.2),
                "example_np_metric_2": np.ulonglong(10000000),
            },
            {
                "pred_target_abs_diff":
                np.abs(given_df["prediction"] - given_df["target"]),
                "example_dictionary_artifact": {
                    "a": 1,
                    "b": 2
                },
            },
        )

    res_metrics_2, res_artifacts_2 = _evaluate_custom_metric(
        0, example_custom_metric_with_artifacts, eval_df, metrics)
    assert res_metrics_2 == {
        "example_count_times_1_point_5": metrics["example_count"] * 1.5,
        "sum_on_label_minus_5": metrics["sum_on_label"] - 5,
        "example_np_metric_1": np.float32(123.2),
        "example_np_metric_2": np.ulonglong(10000000),
    }
    assert "pred_target_abs_diff" in res_artifacts_2
    assert res_artifacts_2["pred_target_abs_diff"].equals(
        np.abs(eval_df["prediction"] - eval_df["target"]))

    assert "example_dictionary_artifact" in res_artifacts_2
    assert res_artifacts_2["example_dictionary_artifact"] == {"a": 1, "b": 2}
示例#2
0
def test_evaluate_custom_metric_lambda(fn, expectation):
    eval_df = pd.DataFrame({
        "prediction": [1.2, 1.9, 3.2],
        "target": [1, 2, 3]
    })
    metrics = _get_regressor_metrics(eval_df["target"], eval_df["prediction"])
    with expectation:
        _evaluate_custom_metric(0, fn, eval_df, metrics)
示例#3
0
def test_evaluate_custom_metric_incorrect_return_formats():
    eval_df = pd.DataFrame({
        "prediction": [1.2, 1.9, 3.2],
        "target": [1, 2, 3]
    })
    metrics = _get_regressor_metrics(eval_df["target"], eval_df["prediction"])

    def dummy_fn(*_):
        pass

    with pytest.raises(
            MlflowException,
            match=f"'{dummy_fn.__name__}' (.*) returned None",
    ):
        _evaluate_custom_metric(_CustomMetric(dummy_fn, "dummy_fn", 0, ""),
                                eval_df, metrics)

    def incorrect_return_type_1(*_):
        return 3

    def incorrect_return_type_2(*_):
        return "stuff", 3

    for test_fn in (
            incorrect_return_type_1,
            incorrect_return_type_2,
    ):
        with pytest.raises(
                MlflowException,
                match=
                f"'{test_fn.__name__}' (.*) did not return in an expected format",
        ):
            _evaluate_custom_metric(
                _CustomMetric(test_fn, test_fn.__name__, 0, ""), eval_df,
                metrics)

    def non_str_metric_name(*_):
        return {123: 123, "a": 32.1, "b": 3}

    def non_numerical_metric_value(*_):
        return {"stuff": 12, "non_numerical_metric": "123"}

    for test_fn in (
            non_str_metric_name,
            non_numerical_metric_value,
    ):
        with pytest.raises(
                MlflowException,
                match=
                f"'{test_fn.__name__}' (.*) did not return metrics as a dictionary of "
                "string metric names with numerical values",
        ):
            _evaluate_custom_metric(
                _CustomMetric(test_fn, test_fn.__name__, 0, ""), eval_df,
                metrics)

    def non_str_artifact_name(*_):
        return {"a": 32.1, "b": 3}, {1: [1, 2, 3]}

    with pytest.raises(
            MlflowException,
            match=
            f"'{non_str_artifact_name.__name__}' (.*) did not return artifacts as a "
            "dictionary of string artifact names with their corresponding objects",
    ):
        _evaluate_custom_metric(
            _CustomMetric(non_str_artifact_name,
                          non_str_artifact_name.__name__, 0, ""),
            eval_df,
            metrics,
        )